Simulation using Concord¶
In [1]:
Copied!
%load_ext autoreload
%autoreload 2
%load_ext autoreload
%autoreload 2
In [2]:
Copied!
import numpy as np
import scanpy as sc
import time
from pathlib import Path
import torch
import Concord as ccd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib as mpl
from matplotlib import font_manager, rcParams
custom_rc = {
'font.family': 'Arial', # Set the desired font for this plot
}
mpl.rcParams['svg.fonttype'] = 'none'
mpl.rcParams['pdf.fonttype'] = 42
import numpy as np
import scanpy as sc
import time
from pathlib import Path
import torch
import Concord as ccd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib as mpl
from matplotlib import font_manager, rcParams
custom_rc = {
'font.family': 'Arial', # Set the desired font for this plot
}
mpl.rcParams['svg.fonttype'] = 'none'
mpl.rcParams['pdf.fonttype'] = 42
In [3]:
Copied!
proj_name = "simulation_trajectory"
save_dir = f"../save/dev_{proj_name}-{time.strftime('%b%d')}/"
save_dir = Path(save_dir)
save_dir.mkdir(parents=True, exist_ok=True)
data_dir = f"../data/{proj_name}/"
data_dir = Path(data_dir)
data_dir.mkdir(parents=True, exist_ok=True)
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
print(device)
seed = 0
ccd.ul.set_seed(seed)
file_suffix = f"{time.strftime('%b%d-%H%M')}"
file_suffix
proj_name = "simulation_trajectory"
save_dir = f"../save/dev_{proj_name}-{time.strftime('%b%d')}/"
save_dir = Path(save_dir)
save_dir.mkdir(parents=True, exist_ok=True)
data_dir = f"../data/{proj_name}/"
data_dir = Path(data_dir)
data_dir.mkdir(parents=True, exist_ok=True)
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
print(device)
seed = 0
ccd.ul.set_seed(seed)
file_suffix = f"{time.strftime('%b%d-%H%M')}"
file_suffix
cpu
Out[3]:
'Feb16-1614'
In [4]:
Copied!
state_key = 'time'
batch_key = 'batch'
state_type = 'trajectory'
batch_type = 'batch_specific_features'
distribution = 'normal'
leiden_key = 'leiden_no_noise'
state_key = 'time'
batch_key = 'batch'
state_type = 'trajectory'
batch_type = 'batch_specific_features'
distribution = 'normal'
leiden_key = 'leiden_no_noise'
In [46]:
Copied!
from Concord.utils.simulation import Simulation
# Create an instance of the Simulation class
sim = Simulation(n_cells=1000, n_genes=100, n_batches=2, n_states=3,
state_type=state_type,
state_distribution = distribution,
state_level=10,
state_min_level=0,
state_dispersion=2.0,
program_structure='linear_bidirectional',
program_on_time_fraction=0.1,
trajectory_program_num=5,
trajectory_cell_block_size_ratio=0.6,
trajectory_loop_to=None,
batch_distribution = distribution,
batch_type=batch_type,
batch_level=[10,10],
batch_dispersion=[2.0, 2.0],
non_neg=True, to_int=True,
seed=42)
# Generate the simulated data
adata, adata_state = sim.simulate_data()
from Concord.utils.simulation import Simulation
# Create an instance of the Simulation class
sim = Simulation(n_cells=1000, n_genes=100, n_batches=2, n_states=3,
state_type=state_type,
state_distribution = distribution,
state_level=10,
state_min_level=0,
state_dispersion=2.0,
program_structure='linear_bidirectional',
program_on_time_fraction=0.1,
trajectory_program_num=5,
trajectory_cell_block_size_ratio=0.6,
trajectory_loop_to=None,
batch_distribution = distribution,
batch_type=batch_type,
batch_level=[10,10],
batch_dispersion=[2.0, 2.0],
non_neg=True, to_int=True,
seed=42)
# Generate the simulated data
adata, adata_state = sim.simulate_data()
Concord.utils.simulation - INFO - Simulating trajectory with 3 states, distribution: normal with mean expression 10 and dispersion 2.0. Concord.utils.simulation - INFO - Simulating batch-specific features effect on batch_1 by appending a set of batch-specific genes with normal distributed value with level 10 and dispersion 2.0. Concord.utils.simulation - INFO - Simulating batch-specific features effect on batch_2 by appending a set of batch-specific genes with normal distributed value with level 10 and dispersion 2.0.
In [47]:
Copied!
ccd.pl.heatmap_with_annotations(adata_state, val='no_noise', obs_keys=[state_key], yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='True state', save_path=save_dir/f'true_state_heatmap_{file_suffix}.png', figsize=(6, 4), dpi=300)
ccd.pl.heatmap_with_annotations(adata_state, val='wt_noise', obs_keys=[state_key], yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='True state with noise', save_path=save_dir/f'true_state_with_noise_heatmap_{file_suffix}.png', figsize=(6, 4), dpi=300)
ccd.pl.heatmap_with_annotations(adata, val='X', obs_keys=[state_key, batch_key], yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='Simulated data with batch signal', save_path=save_dir/f'simulated_data_heatmap_{file_suffix}.png', figsize=(6, 4), dpi=300)
ccd.pl.heatmap_with_annotations(adata_state, val='no_noise', obs_keys=[state_key], yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='True state', save_path=save_dir/f'true_state_heatmap_{file_suffix}.png', figsize=(6, 4), dpi=300)
ccd.pl.heatmap_with_annotations(adata_state, val='wt_noise', obs_keys=[state_key], yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='True state with noise', save_path=save_dir/f'true_state_with_noise_heatmap_{file_suffix}.png', figsize=(6, 4), dpi=300)
ccd.pl.heatmap_with_annotations(adata, val='X', obs_keys=[state_key, batch_key], yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='Simulated data with batch signal', save_path=save_dir/f'simulated_data_heatmap_{file_suffix}.png', figsize=(6, 4), dpi=300)
Out[47]:
<seaborn.matrix.ClusterGrid at 0x7f094c765ca0>
No batch effect, no noise¶
In [48]:
Copied!
ccd.ul.run_pca(adata_state, source_key='no_noise', result_key='PCA_no_noise', n_pc=30, random_state=seed)
ccd.ul.run_umap(adata_state, source_key='PCA_no_noise', result_key='UMAP_no_noise', random_state=seed)
ccd.ul.run_pca(adata_state, source_key='no_noise', result_key='PCA_no_noise', n_pc=30, random_state=seed)
ccd.ul.run_umap(adata_state, source_key='PCA_no_noise', result_key='UMAP_no_noise', random_state=seed)
Concord - INFO - PCA performed on source data with 30 components Concord - INFO - PCA embedding stored in adata.obsm['PCA_no_noise'] Concord - INFO - UMAP embedding stored in adata.obsm['UMAP_no_noise']
In [49]:
Copied!
sc.pp.neighbors(adata_state, use_rep='PCA_no_noise', n_neighbors=30, random_state=seed)
sc.tl.leiden(adata_state, resolution=1.0, key_added=leiden_key, random_state=seed)
adata.obs[leiden_key] = adata_state.obs[leiden_key]
sc.pp.neighbors(adata_state, use_rep='PCA_no_noise', n_neighbors=30, random_state=seed)
sc.tl.leiden(adata_state, resolution=1.0, key_added=leiden_key, random_state=seed)
adata.obs[leiden_key] = adata_state.obs[leiden_key]
In [26]:
Copied!
show_basis = 'PCA_no_noise'
show_cols = [state_key, leiden_key, batch_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(7.5,2.5), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data', rasterized=False,
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.pdf"
)
show_basis = 'PCA_no_noise'
show_cols = [state_key, leiden_key, batch_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(7.5,2.5), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data', rasterized=False,
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.pdf"
)
In [51]:
Copied!
show_basis = 'UMAP_no_noise'
show_cols = [state_key, leiden_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.png"
)
show_basis = 'UMAP_no_noise'
show_cols = [state_key, leiden_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.png"
)
NO batch effect, noise added, PCA and UMAP¶
In [52]:
Copied!
adata_state.X = adata_state.layers['wt_noise'].copy()
ccd.ul.run_pca(adata_state, source_key='wt_noise', result_key='PCA_wt_noise', n_pc=30, random_state=seed)
ccd.ul.run_umap(adata_state, source_key='PCA_wt_noise', result_key='UMAP_wt_noise', random_state=seed)
adata_state.X = adata_state.layers['wt_noise'].copy()
ccd.ul.run_pca(adata_state, source_key='wt_noise', result_key='PCA_wt_noise', n_pc=30, random_state=seed)
ccd.ul.run_umap(adata_state, source_key='PCA_wt_noise', result_key='UMAP_wt_noise', random_state=seed)
Concord - INFO - PCA performed on source data with 30 components Concord - INFO - PCA embedding stored in adata.obsm['PCA_wt_noise'] Concord - INFO - UMAP embedding stored in adata.obsm['UMAP_wt_noise']
In [25]:
Copied!
show_basis = 'PCA_wt_noise'
show_cols = [state_key, leiden_key, batch_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(7.5,2.5), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data', rasterized=False,
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.pdf"
)
show_basis = 'PCA_wt_noise'
show_cols = [state_key, leiden_key, batch_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(7.5,2.5), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data', rasterized=False,
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.pdf"
)
In [54]:
Copied!
show_basis = 'UMAP_wt_noise'
show_cols = [state_key, leiden_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.png"
)
show_basis = 'UMAP_wt_noise'
show_cols = [state_key, leiden_key]
ccd.pl.plot_embedding(
adata_state, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"nobatch_{show_basis}_{file_suffix}.png"
)
No batch correction, PCA and UMAP¶
In [55]:
Copied!
n_pcs = min(adata.n_obs, adata.n_vars)-1
sc.pp.pca(adata, n_comps=n_pcs)
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=n_pcs)
sc.tl.umap(adata, min_dist=0.5)
adata.obsm["Unintegrated"] = adata.obsm["X_pca"]
n_pcs = min(adata.n_obs, adata.n_vars)-1
sc.pp.pca(adata, n_comps=n_pcs)
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=n_pcs)
sc.tl.umap(adata, min_dist=0.5)
adata.obsm["Unintegrated"] = adata.obsm["X_pca"]
In [56]:
Copied!
show_basis = 'X_pca'
show_cols = [state_key, batch_key, leiden_key]
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"wtbatch_{show_basis}_{file_suffix}.png"
)
show_basis = 'X_pca'
show_cols = [state_key, batch_key, leiden_key]
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"wtbatch_{show_basis}_{file_suffix}.png"
)
In [57]:
Copied!
show_basis = 'X_umap'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"wtbatch_{show_basis}_{file_suffix}.png"
)
show_basis = 'X_umap'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=20, legend_loc='on data',
save_path=save_dir / f"wtbatch_{show_basis}_{file_suffix}.png"
)
Concord¶
In [100]:
Copied!
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
seed=seed, # random seed
verbose=False, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord'
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
seed=seed, # random seed
verbose=False, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord'
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
Concord - WARNING - No input feature list provided. It is recommended to first select features using the command `concord.ul.select_features()`.
WARNING clustering 1000 points to 31 centroids: please provide at least 1209 training points
p_intra_knn: 0.3
Epoch 0 Training: 14it [00:00, 64.20it/s, loss=4.07] Epoch 1 Training: 100%|██████████| 14/14 [00:00<00:00, 70.18it/s, loss=3.96] Epoch 2 Training: 100%|██████████| 14/14 [00:00<00:00, 74.94it/s, loss=3.89] Epoch 3 Training: 100%|██████████| 14/14 [00:00<00:00, 80.20it/s, loss=3.78] Epoch 4 Training: 100%|██████████| 14/14 [00:00<00:00, 79.21it/s, loss=3.78] Epoch 5 Training: 100%|██████████| 14/14 [00:00<00:00, 71.98it/s, loss=3.75] Epoch 6 Training: 100%|██████████| 14/14 [00:00<00:00, 68.10it/s, loss=3.8] Epoch 7 Training: 100%|██████████| 14/14 [00:00<00:00, 42.68it/s, loss=3.77] Epoch 8 Training: 100%|██████████| 14/14 [00:00<00:00, 42.76it/s, loss=3.82] Epoch 9 Training: 100%|██████████| 14/14 [00:00<00:00, 36.47it/s, loss=3.78]
In [101]:
Copied!
ccd.pl.heatmap_with_annotations(adata, val='Concord', obs_keys=[state_key, batch_key],
cluster_cols=True, cluster_rows=True, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'Concord_latent_heatmap_{file_suffix}.png')
ccd.pl.heatmap_with_annotations(adata, val='Concord', obs_keys=[state_key, batch_key],
cluster_cols=True, cluster_rows=True, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'Concord_latent_heatmap_{file_suffix}.png')
Out[101]:
<seaborn.matrix.ClusterGrid at 0x7f081402c070>
In [102]:
Copied!
# Compute the correlation matrix
cos_mtx = ccd.ul.cosine_sim(adata.obsm['Concord'])
ccd.pl.heatmap_with_annotations(adata, val=cos_mtx,
obs_keys=[state_key, batch_key], cluster_cols=True, cluster_rows=True, cmap='viridis', save_path=save_dir/f'Concord_cosine_{file_suffix}.png')
# Compute the correlation matrix
cos_mtx = ccd.ul.cosine_sim(adata.obsm['Concord'])
ccd.pl.heatmap_with_annotations(adata, val=cos_mtx,
obs_keys=[state_key, batch_key], cluster_cols=True, cluster_rows=True, cmap='viridis', save_path=save_dir/f'Concord_cosine_{file_suffix}.png')
Out[102]:
<seaborn.matrix.ClusterGrid at 0x7f07f40f1b20>
In [103]:
Copied!
n_pc = min(adata.obsm[output_key].shape[1], adata.shape[0]) - 1
ccd.ul.run_pca(adata, source_key=output_key, result_key=f'{output_key}_PCA', n_pc=n_pc, random_state=seed)
show_basis = f'{output_key}'
show_cols = [state_key, batch_key, leiden_key]
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
n_pc = min(adata.obsm[output_key].shape[1], adata.shape[0]) - 1
ccd.ul.run_pca(adata, source_key=output_key, result_key=f'{output_key}_PCA', n_pc=n_pc, random_state=seed)
show_basis = f'{output_key}'
show_cols = [state_key, batch_key, leiden_key]
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
In [ ]:
Copied!
sc.pp.neighbors(adata, n_neighbors=30, use_rep = 'Concord')
ccd.ul.run_umap(adata_b1, source_key='Concord', result_key='Concord_UMAP_2D', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_cols = [state_key, batch_key, leiden_key]
show_basis = 'Concord_UMAP_2D'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
#pal = {'cluster':'Set1', 'batch':'Set2', 'leiden':'tab20'},
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
sc.pp.neighbors(adata, n_neighbors=30, use_rep = 'Concord')
ccd.ul.run_umap(adata_b1, source_key='Concord', result_key='Concord_UMAP_2D', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_cols = [state_key, batch_key, leiden_key]
show_basis = 'Concord_UMAP_2D'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
#pal = {'cluster':'Set1', 'batch':'Set2', 'leiden':'tab20'},
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
In [105]:
Copied!
adata.write_h5ad(data_dir / f"adata_{file_suffix}.h5ad")
adata_state.write_h5ad(data_dir / f"adata_state_{file_suffix}.h5ad")
adata.write_h5ad(data_dir / f"adata_{file_suffix}.h5ad")
adata_state.write_h5ad(data_dir / f"adata_state_{file_suffix}.h5ad")
In [106]:
Copied!
file_suffix
file_suffix
Out[106]:
'Nov16-1334'
Run other methods¶
In [8]:
Copied!
file_suffix = f"{time.strftime('%b%d-%H%M')}"
ccd.set_verbose_mode(True)
show_cols = [state_key, batch_key]
timer = ccd.ul.Timer()
time_log = {}
file_suffix = f"{time.strftime('%b%d-%H%M')}"
ccd.set_verbose_mode(True)
show_cols = [state_key, batch_key]
timer = ccd.ul.Timer()
time_log = {}
Scanorama¶
In [69]:
Copied!
output_key = 'Scanorama'
with timer:
ccd.ul.run_scanorama(adata, batch_key="batch", output_key=output_key, return_corrected=True)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
output_key = 'Scanorama'
with timer:
ccd.ul.run_scanorama(adata, batch_key="batch", output_key=output_key, return_corrected=True)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
Found 120 genes among all datasets [[0. 0.992] [0. 0. ]] Processing datasets (0, 1) Found 120 genes among all datasets [[0. 0.992] [0. 0. ]] Processing datasets (0, 1)
In [70]:
Copied!
# Plot heatmap of Scanorama corrected data
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_corrected_heatmap_{file_suffix}.png')
# Plot heatmap of Scanorama corrected data
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_corrected_heatmap_{file_suffix}.png')
Out[70]:
<seaborn.matrix.ClusterGrid at 0x7f08ec5a8b50>
In [71]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord - INFO - UMAP embedding stored in adata.obsm['Scanorama_UMAP']
Liger¶
In [72]:
Copied!
output_key = 'Liger'
adata.layers["counts"] = adata.X.copy()
with timer:
ccd.ul.run_liger(adata, batch_key="batch", count_layer="counts", output_key=output_key, k=30, return_corrected=True)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
output_key = 'Liger'
adata.layers["counts"] = adata.X.copy()
with timer:
ccd.ul.run_liger(adata, batch_key="batch", count_layer="counts", output_key=output_key, k=30, return_corrected=True)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
100%|██████████| 30/30 [00:04<00:00, 6.75it/s]
In [73]:
Copied!
# Plot heatmap of Scanorama corrected data
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_corrected_heatmap_{file_suffix}.png')
# Plot heatmap of Scanorama corrected data
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_corrected_heatmap_{file_suffix}.png')
Out[73]:
<seaborn.matrix.ClusterGrid at 0x7f094c2c97f0>
In [74]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord - INFO - UMAP embedding stored in adata.obsm['Liger_UMAP']
Harmony¶
In [75]:
Copied!
output_key = 'Harmony'
with timer:
ccd.ul.run_harmony(adata, batch_key="batch", input_key='X_pca', output_key=output_key)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
output_key = 'Harmony'
with timer:
ccd.ul.run_harmony(adata, batch_key="batch", input_key='X_pca', output_key=output_key)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
Initialization is completed. Completed 1 / 10 iteration(s). Completed 2 / 10 iteration(s). Completed 3 / 10 iteration(s). Completed 4 / 10 iteration(s). Completed 5 / 10 iteration(s). Completed 6 / 10 iteration(s). Completed 7 / 10 iteration(s). Reach convergence after 7 iteration(s).
In [76]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord - INFO - UMAP embedding stored in adata.obsm['Harmony_UMAP']
scVI¶
In [77]:
Copied!
output_key = 'scVI'
transform_batch = 'batch_1'
with timer:
scvi_vae = ccd.ul.run_scvi(adata, batch_key="batch", output_key=output_key, return_model=True, return_corrected=True, transform_batch=transform_batch)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
output_key = 'scVI'
transform_batch = 'batch_1'
with timer:
scvi_vae = ccd.ul.run_scvi(adata, batch_key="batch", output_key=output_key, return_model=True, return_corrected=True, transform_batch=transform_batch)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary. GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Epoch 400/400: 100%|██████████| 400/400 [00:41<00:00, 9.17it/s, v_num=1, train_loss_step=196, train_loss_epoch=196]
`Trainer.fit` stopped: `max_epochs=400` reached.
Epoch 400/400: 100%|██████████| 400/400 [00:41<00:00, 9.66it/s, v_num=1, train_loss_step=196, train_loss_epoch=196]
In [78]:
Copied!
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected_{transform_batch}', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_{transform_batch}_corrected_heatmap_{file_suffix}.png')
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected_{transform_batch}', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_{transform_batch}_corrected_heatmap_{file_suffix}.png')
Out[78]:
<seaborn.matrix.ClusterGrid at 0x7f08b4238bb0>
In [79]:
Copied!
output_key = 'scVI'
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
output_key = 'scVI'
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord - INFO - UMAP embedding stored in adata.obsm['scVI_UMAP']
scANVI¶
In [80]:
Copied!
output_key = 'scANVI'
with timer:
ccd.ul.run_scanvi(adata, batch_key="batch", labels_key=leiden_key, output_key=output_key, scvi_model=scvi_vae, return_corrected=True, transform_batch=transform_batch)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
output_key = 'scANVI'
with timer:
ccd.ul.run_scanvi(adata, batch_key="batch", labels_key=leiden_key, output_key=output_key, scvi_model=scvi_vae, return_corrected=True, transform_batch=transform_batch)
time_log[output_key] = timer.interval
ccd.ul.save_obsm_to_hdf5(adata, save_dir / f"obsm_{file_suffix}.h5")
INFO Training for 20 epochs.
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary. GPU available: True (cuda), used: True TPU available: False, using: 0 TPU cores HPU available: False, using: 0 HPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Epoch 20/20: 100%|██████████| 20/20 [00:05<00:00, 4.35it/s, v_num=1, train_loss_step=310, train_loss_epoch=232]
`Trainer.fit` stopped: `max_epochs=20` reached.
Epoch 20/20: 100%|██████████| 20/20 [00:05<00:00, 3.73it/s, v_num=1, train_loss_step=310, train_loss_epoch=232]
In [81]:
Copied!
# Plot heatmap of Scanorama corrected data
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected_{transform_batch}', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_{transform_batch}_corrected_heatmap_{file_suffix}.png')
# Plot heatmap of Scanorama corrected data
ccd.pl.heatmap_with_annotations(adata, val=f'{output_key}_corrected_{transform_batch}', obs_keys=[state_key, batch_key],
cluster_cols=False, cluster_rows=False, cmap='viridis',
pal = {'cluster': 'Set1', 'batch':'Set2'}, add_color_legend=True,
save_path=save_dir/f'{output_key}_{transform_batch}_corrected_heatmap_{file_suffix}.png')
Out[81]:
<seaborn.matrix.ClusterGrid at 0x7f08f4786cd0>
In [82]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord - INFO - UMAP embedding stored in adata.obsm['scANVI_UMAP']
Concord¶
In [6]:
Copied!
min_p_intra_domain=0.95
min_p_intra_domain=0.95
In [28]:
Copied!
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
min_p_intra_domain=min_p_intra_domain, # probability of sampling intra-domain pairs
seed=seed, # random seed
verbose=True, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord'
with timer:
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
min_p_intra_domain=min_p_intra_domain, # probability of sampling intra-domain pairs
seed=seed, # random seed
verbose=True, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord'
with timer:
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
Concord - INFO - Setting sampler_knn to 100 to be 1/10 the number of cells in the dataset. You can change this value by setting sampler_knn in the configuration. Concord - WARNING - No input feature list provided. It is recommended to first select features using the command `concord.ul.select_features()`. Concord - INFO - Proceeding with all 120 features in the dataset. Concord - INFO - Column 'batch' is already of type: category Concord - INFO - Unused levels dropped for column 'batch'. Concord - INFO - Encoder input dim: 120 Concord - INFO - Model loaded to device: cuda:3 Concord - INFO - Total number of parameters: 19952 Concord.model.anndataset - INFO - Initialized dataset with 1000 samples. Data structure: ['input', 'domain', 'idx'] Concord - INFO - Using existing embedding 'X_pca' from adata.obsm Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss IVF index. nprobe=10 Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.dataloader - INFO - Number of unique_domains: 2
WARNING clustering 1000 points to 31 centroids: please provide at least 1209 training points
Concord.model.dataloader - INFO - Calculating each domain's coverage of the global manifold using X_pca.
Concord.model.dataloader - INFO - Converting coverage {'batch_1': 0.53, 'batch_2': 0.545} to p_intra_domain...
Concord.model.dataloader - INFO - Final p_intra_domain values: batch_1: 0.98, batch_2: 0.98
p_intra_knn: 0.5
Concord - INFO - Starting epoch 1/10
Concord - INFO - Processing chunk 1/1 for epoch 1
Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 0 Training: 14it [00:00, 103.37it/s, loss=4.19]
Concord - INFO - Epoch 0 | Train Loss: 4.38, MSE: 0.00, CLASS: 0.00, CONTRAST: 4.38, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 2/10
Concord - INFO - Processing chunk 1/1 for epoch 2 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 1 Training: 100%|██████████| 14/14 [00:00<00:00, 85.60it/s, loss=4.08]
Concord - INFO - Epoch 1 | Train Loss: 4.10, MSE: 0.00, CLASS: 0.00, CONTRAST: 4.10, IMPORTANCE: 0.00
Concord - INFO - Starting epoch 3/10 Concord - INFO - Processing chunk 1/1 for epoch 3 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 2 Training: 100%|██████████| 14/14 [00:00<00:00, 98.71it/s, loss=4.01]
Concord - INFO - Epoch 2 | Train Loss: 4.06, MSE: 0.00, CLASS: 0.00, CONTRAST: 4.06, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 4/10
Concord - INFO - Processing chunk 1/1 for epoch 4 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 3 Training: 100%|██████████| 14/14 [00:00<00:00, 101.67it/s, loss=4.01]
Concord - INFO - Epoch 3 | Train Loss: 4.03, MSE: 0.00, CLASS: 0.00, CONTRAST: 4.03, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 5/10
Concord - INFO - Processing chunk 1/1 for epoch 5 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 4 Training: 100%|██████████| 14/14 [00:00<00:00, 102.88it/s, loss=3.97]
Concord - INFO - Epoch 4 | Train Loss: 3.95, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.95, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 6/10
Concord - INFO - Processing chunk 1/1 for epoch 6 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 5 Training: 100%|██████████| 14/14 [00:00<00:00, 97.58it/s, loss=4.02]
Concord - INFO - Epoch 5 | Train Loss: 3.92, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.92, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 7/10
Concord - INFO - Processing chunk 1/1 for epoch 7 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 6 Training: 100%|██████████| 14/14 [00:00<00:00, 99.52it/s, loss=3.93]
Concord - INFO - Epoch 6 | Train Loss: 3.87, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.87, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 8/10
Concord - INFO - Processing chunk 1/1 for epoch 8 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 7 Training: 100%|██████████| 14/14 [00:00<00:00, 103.13it/s, loss=3.79]
Concord - INFO - Epoch 7 | Train Loss: 3.89, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.89, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 9/10
Concord - INFO - Processing chunk 1/1 for epoch 9 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 8 Training: 100%|██████████| 14/14 [00:00<00:00, 99.00it/s, loss=3.89]
Concord - INFO - Epoch 8 | Train Loss: 3.84, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.84, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 10/10
Concord - INFO - Processing chunk 1/1 for epoch 10 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 9 Training: 100%|██████████| 14/14 [00:00<00:00, 99.82it/s, loss=3.8]
Concord - INFO - Epoch 9 | Train Loss: 3.87, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.87, IMPORTANCE: 0.00
Concord - INFO - Model saved to ../save/dev_simulation_trajectory-Nov24/final_model.pth Concord - INFO - Final model saved at: ../save/dev_simulation_trajectory-Nov24/final_model.pth; Configuration saved at: ../save/dev_simulation_trajectory-Nov24/config.json. Concord.model.anndataset - INFO - Initialized dataset with 1000 samples. Data structure: ['input', 'domain', 'idx'] Concord - INFO - Predicting for chunk 1/1
In [29]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord - INFO - UMAP embedding stored in adata.obsm['Concord_UMAP']
Concord with decoder¶
In [32]:
Copied!
decoder_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
class_key=leiden_key, # key indicating class
use_classifier=False, # use classifier
use_decoder=True,
min_p_intra_domain=min_p_intra_domain, # probability of sampling intra-domain pairs
domain_embedding_dim=8,
seed=seed, # random seed
verbose=False, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
output_key = 'Concord-decoder'
with timer:
decoder_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(decoder_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
decoder_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
class_key=leiden_key, # key indicating class
use_classifier=False, # use classifier
use_decoder=True,
min_p_intra_domain=min_p_intra_domain, # probability of sampling intra-domain pairs
domain_embedding_dim=8,
seed=seed, # random seed
verbose=False, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
output_key = 'Concord-decoder'
with timer:
decoder_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(decoder_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
Concord - WARNING - No input feature list provided. It is recommended to first select features using the command `concord.ul.select_features()`.
WARNING clustering 1000 points to 31 centroids: please provide at least 1209 training points
p_intra_knn: 0.5
Epoch 0 Training: 14it [00:00, 79.42it/s, loss=15.9] Epoch 1 Training: 100%|██████████| 14/14 [00:00<00:00, 90.65it/s, loss=8.4] Epoch 2 Training: 100%|██████████| 14/14 [00:00<00:00, 91.55it/s, loss=8.98] Epoch 3 Training: 100%|██████████| 14/14 [00:00<00:00, 91.17it/s, loss=8.14] Epoch 4 Training: 100%|██████████| 14/14 [00:00<00:00, 91.58it/s, loss=8.15] Epoch 5 Training: 100%|██████████| 14/14 [00:00<00:00, 76.94it/s, loss=7.27] Epoch 6 Training: 100%|██████████| 14/14 [00:00<00:00, 89.47it/s, loss=7.17] Epoch 7 Training: 100%|██████████| 14/14 [00:00<00:00, 68.10it/s, loss=7.57] Epoch 8 Training: 100%|██████████| 14/14 [00:00<00:00, 89.34it/s, loss=7.2] Epoch 9 Training: 100%|██████████| 14/14 [00:00<00:00, 92.33it/s, loss=7.09]
In [33]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
In [24]:
Copied!
# Predict and store the results
decoder_domains = adata.obs[batch_key].unique()
for domain in decoder_domains:
_, decoded, _, _, _, _ = decoder_ccd.predict(decoder_ccd.loader, return_decoded=True, decoder_domain=domain, return_class=True, return_class_prob=True)
save_key = f"{output_key}_decoded_{domain}"
adata.layers[save_key] = decoded
adata.layers
# Predict and store the results
decoder_domains = adata.obs[batch_key].unique()
for domain in decoder_domains:
_, decoded, _, _, _, _ = decoder_ccd.predict(decoder_ccd.loader, return_decoded=True, decoder_domain=domain, return_class=True, return_class_prob=True)
save_key = f"{output_key}_decoded_{domain}"
adata.layers[save_key] = decoded
adata.layers
Out[24]:
Layers with keys: Concord-decoder_decoded_batch_1, Concord-decoder_decoded_batch_2, Liger_corrected, Scanorama_corrected, counts, scANVI_corrected_batch_1, scVI_corrected_batch_1, wt_noise
Concord with classifier¶
In [30]:
Copied!
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
class_key=leiden_key, # key indicating class
use_classifier=True, # use classifier
seed=seed, # random seed
min_p_intra_domain=min_p_intra_domain, # probability of sampling intra-domain pairs
verbose=False, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord-class'
with timer:
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=batch_key, # key indicating batch
class_key=leiden_key, # key indicating class
use_classifier=True, # use classifier
seed=seed, # random seed
min_p_intra_domain=min_p_intra_domain, # probability of sampling intra-domain pairs
verbose=False, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord-class'
with timer:
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
time_log[output_key] = timer.interval
# Save the latent embedding to a file, so that it can be loaded later
ccd.ul.save_obsm_to_hdf5(cur_ccd.adata, save_dir / f"obsm_{file_suffix}.h5")
Concord - WARNING - No input feature list provided. It is recommended to first select features using the command `concord.ul.select_features()`.
WARNING clustering 1000 points to 31 centroids: please provide at least 1209 training points
p_intra_knn: 0.5
Epoch 0 Training: 14it [00:00, 86.72it/s, loss=7.16] Epoch 1 Training: 100%|██████████| 14/14 [00:00<00:00, 89.68it/s, loss=6.15] Epoch 2 Training: 100%|██████████| 14/14 [00:00<00:00, 88.57it/s, loss=5.99] Epoch 3 Training: 100%|██████████| 14/14 [00:00<00:00, 89.72it/s, loss=5.53] Epoch 4 Training: 100%|██████████| 14/14 [00:00<00:00, 92.24it/s, loss=5.49] Epoch 5 Training: 100%|██████████| 14/14 [00:00<00:00, 81.94it/s, loss=5.31] Epoch 6 Training: 100%|██████████| 14/14 [00:00<00:00, 92.42it/s, loss=5.32] Epoch 7 Training: 100%|██████████| 14/14 [00:00<00:00, 91.86it/s, loss=5.27] Epoch 8 Training: 100%|██████████| 14/14 [00:00<00:00, 92.45it/s, loss=5.04] Epoch 9 Training: 100%|██████████| 14/14 [00:00<00:00, 95.17it/s, loss=5.11]
In [31]:
Copied!
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
ccd.ul.run_umap(adata, source_key=output_key, result_key=f'{output_key}_UMAP', n_components=2, n_neighbors=30, min_dist=0.5, metric='euclidean', random_state=seed)
show_basis = f'{output_key}_UMAP'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, figsize=(8,3), dpi=300, ncols=3, font_size=5, point_size=10, legend_loc='on data',
save_path=save_dir / f"{show_basis}_{file_suffix}.png"
)
Concord without specifying domain¶
In [7]:
Copied!
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=None, # key indicating batch
seed=seed, # random seed
verbose=True, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord_nd'
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
cur_ccd = ccd.Concord(adata=adata,
input_feature=None,
batch_size=64,
n_epochs=10,
domain_key=None, # key indicating batch
seed=seed, # random seed
verbose=True, # print training progress
device=device, # device to run on
save_dir=save_dir # directory to save model checkpoints
)
# Encode data, saving the latent embedding in adata.obsm['Concord']
output_key = 'Concord_nd'
cur_ccd.encode_adata(input_layer_key='X', output_key=output_key, preprocess=False)
Concord - INFO - Setting sampler_knn to 20 to be 1/50 the number of cells in the dataset. You can change this value by setting sampler_knn in the configuration. Concord - WARNING - No input feature list provided. It is recommended to first select features using the command `concord.ul.select_features()`. Concord - INFO - Proceeding with all 120 features in the dataset. Concord - WARNING - domain/batch information not found, all samples will be treated as from single domain/batch. Concord - INFO - Encoder input dim: 120 Concord - INFO - Model loaded to device: cpu Concord - INFO - Total number of parameters: 19944 Concord.model.anndataset - INFO - Initialized dataset with 1000 samples. Data structure: ['input', 'domain', 'idx'] Concord.model.knn - INFO - Using euclidean distance metric. Concord.model.knn - WARNING - FAISS not found. Using sklearn for k-NN computation. Concord.model.dataloader - INFO - Number of unique_domains: 1 Concord.model.dataloader - INFO - Final p_intra_domain values: single_domain: 1.00
p_intra_knn: 0.3 Concord - INFO - Starting epoch 1/10 Concord - INFO - Processing chunk 1/1 for epoch 1 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 0 Training: 15it [00:00, 46.13it/s, loss=4.08]
Concord - INFO - Epoch 0 | Train Loss: 4.23, MSE: 0.00, CLASS: 0.00, CONTRAST: 4.23, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 2/10 Concord - INFO - Processing chunk 1/1 for epoch 2 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 1 Training: 100%|██████████| 15/15 [00:00<00:00, 214.74it/s, loss=3.91]
Concord - INFO - Epoch 1 | Train Loss: 3.93, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.93, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 3/10 Concord - INFO - Processing chunk 1/1 for epoch 3 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 2 Training: 100%|██████████| 15/15 [00:00<00:00, 298.30it/s, loss=3.76]
Concord - INFO - Epoch 2 | Train Loss: 3.84, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.84, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 4/10 Concord - INFO - Processing chunk 1/1 for epoch 4 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 3 Training: 100%|██████████| 15/15 [00:00<00:00, 221.84it/s, loss=3.75]
Concord - INFO - Epoch 3 | Train Loss: 3.76, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.76, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 5/10 Concord - INFO - Processing chunk 1/1 for epoch 5 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 4 Training: 100%|██████████| 15/15 [00:00<00:00, 293.11it/s, loss=3.67]
Concord - INFO - Epoch 4 | Train Loss: 3.71, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.71, IMPORTANCE: 0.00
Concord - INFO - Starting epoch 6/10 Concord - INFO - Processing chunk 1/1 for epoch 6 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 5 Training: 100%|██████████| 15/15 [00:00<00:00, 216.63it/s, loss=3.65]
Concord - INFO - Epoch 5 | Train Loss: 3.67, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.67, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 7/10 Concord - INFO - Processing chunk 1/1 for epoch 7 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 6 Training: 100%|██████████| 15/15 [00:00<00:00, 237.05it/s, loss=3.67]
Concord - INFO - Epoch 6 | Train Loss: 3.65, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.65, IMPORTANCE: 0.00 Concord - INFO - Starting epoch 8/10 Concord - INFO - Processing chunk 1/1 for epoch 8
Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 7 Training: 100%|██████████| 15/15 [00:00<00:00, 209.42it/s, loss=3.74]
Concord - INFO - Epoch 7 | Train Loss: 3.68, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.68, IMPORTANCE: 0.00
Concord - INFO - Starting epoch 9/10 Concord - INFO - Processing chunk 1/1 for epoch 9 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 8 Training: 100%|██████████| 15/15 [00:00<00:00, 300.22it/s, loss=3.59]
Concord - INFO - Epoch 8 | Train Loss: 3.66, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.66, IMPORTANCE: 0.00
Concord - INFO - Starting epoch 10/10 Concord - INFO - Processing chunk 1/1 for epoch 10 Concord - INFO - Number of samples in train_dataloader: 1000
Epoch 9 Training: 100%|██████████| 15/15 [00:00<00:00, 210.86it/s, loss=3.63]
Concord - INFO - Epoch 9 | Train Loss: 3.64, MSE: 0.00, CLASS: 0.00, CONTRAST: 3.64, IMPORTANCE: 0.00
Concord - INFO - Model saved to ../save/dev_simulation_trajectory-Feb16/final_model_Feb16-1615.pt Concord - INFO - Final model saved at: ../save/dev_simulation_trajectory-Feb16/final_model_Feb16-1615.pt; Configuration saved at: ../save/dev_simulation_trajectory-Feb16/config_Feb16-1615.json. Concord.model.anndataset - INFO - Initialized dataset with 1000 samples. Data structure: ['input', 'domain', 'idx'] Concord - INFO - Predicting for chunk 1/1
In [8]:
Copied!
adata.obsm['Unintegrated'] = adata.obsm['X_pca']
n_pc=30
ccd.ul.run_pca(adata_state, source_key='no_noise', result_key='PCA_no_noise', n_pc=n_pc, random_state=seed)
ccd.ul.run_pca(adata_state, source_key='wt_noise', result_key='PCA_wt_noise', n_pc=n_pc, random_state=seed)
# Put the PCA result in the adata object, so only one object is needed
adata.obsm['PCA_no_noise'] = adata_state.obsm['PCA_no_noise']
adata.obsm['PCA_wt_noise'] = adata_state.obsm['PCA_wt_noise']
adata.obsm
adata.obsm['Unintegrated'] = adata.obsm['X_pca']
n_pc=30
ccd.ul.run_pca(adata_state, source_key='no_noise', result_key='PCA_no_noise', n_pc=n_pc, random_state=seed)
ccd.ul.run_pca(adata_state, source_key='wt_noise', result_key='PCA_wt_noise', n_pc=n_pc, random_state=seed)
# Put the PCA result in the adata object, so only one object is needed
adata.obsm['PCA_no_noise'] = adata_state.obsm['PCA_no_noise']
adata.obsm['PCA_wt_noise'] = adata_state.obsm['PCA_wt_noise']
adata.obsm
Concord - INFO - PCA performed on source data with 30 components Concord - INFO - PCA embedding stored in adata.obsm['PCA_no_noise'] Concord - INFO - PCA performed on source data with 30 components Concord - INFO - PCA embedding stored in adata.obsm['PCA_wt_noise']
Out[8]:
AxisArrays with keys: Concord, Concord-class, Concord-class_PCA, Concord-class_UMAP, Concord-decoder, Concord-decoder_PCA, Concord-decoder_UMAP, Concord_PCA, Concord_UMAP, Concord_UMAP_2D, Harmony, Harmony_PCA, Harmony_UMAP, Liger, Liger_PCA, Liger_UMAP, PCA_no_noise, PCA_no_noise_UMAP, PCA_wt_noise, PCA_wt_noise_UMAP, Scanorama, Scanorama_PCA, Scanorama_UMAP, Unintegrated, Unintegrated_PCA, Unintegrated_UMAP, X_pca, X_umap, scANVI, scANVI_PCA, scANVI_UMAP, scVI, scVI_PCA, scVI_UMAP, Concord_nd
In [9]:
Copied!
latent_keys = ["Unintegrated", "Scanorama", "Liger", "Harmony", "scVI","Concord_nd", "Concord", 'Concord-decoder', "Concord-class", "scANVI"]
ground_keys = ['PCA_no_noise', 'PCA_wt_noise']
combined_keys = ground_keys + latent_keys
latent_keys = ["Unintegrated", "Scanorama", "Liger", "Harmony", "scVI","Concord_nd", "Concord", 'Concord-decoder', "Concord-class", "scANVI"]
ground_keys = ['PCA_no_noise', 'PCA_wt_noise']
combined_keys = ground_keys + latent_keys
In [13]:
Copied!
# Run umap and PCA for all latent embeddings
for basis in combined_keys:
print("Running UMAP and PCA for", basis)
if basis not in adata.obsm:
continue
#if 'UMAP' not in basis:
ccd.ul.run_umap(adata, source_key=basis, result_key=f'{basis}_UMAP', n_components=2, n_neighbors=30, min_dist=0.8, metric='euclidean', random_state=seed)
if 'PCA' not in basis:
n_pc = min(adata.obsm[basis].shape[1], adata.shape[0]) - 1
ccd.ul.run_pca(adata, source_key=basis, result_key=f'{basis}_PCA', n_pc=n_pc, random_state=seed)
# Run umap and PCA for all latent embeddings
for basis in combined_keys:
print("Running UMAP and PCA for", basis)
if basis not in adata.obsm:
continue
#if 'UMAP' not in basis:
ccd.ul.run_umap(adata, source_key=basis, result_key=f'{basis}_UMAP', n_components=2, n_neighbors=30, min_dist=0.8, metric='euclidean', random_state=seed)
if 'PCA' not in basis:
n_pc = min(adata.obsm[basis].shape[1], adata.shape[0]) - 1
ccd.ul.run_pca(adata, source_key=basis, result_key=f'{basis}_PCA', n_pc=n_pc, random_state=seed)
Running UMAP and PCA for PCA_no_noise Concord - INFO - UMAP embedding stored in adata.obsm['PCA_no_noise_UMAP'] Running UMAP and PCA for PCA_wt_noise Concord - INFO - UMAP embedding stored in adata.obsm['PCA_wt_noise_UMAP'] Running UMAP and PCA for Unintegrated Concord - INFO - UMAP embedding stored in adata.obsm['Unintegrated_UMAP'] Concord - INFO - PCA performed on source data with 49 components Concord - INFO - PCA embedding stored in adata.obsm['Unintegrated_PCA'] Running UMAP and PCA for Scanorama Concord - INFO - UMAP embedding stored in adata.obsm['Scanorama_UMAP'] Concord - INFO - PCA performed on source data with 99 components Concord - INFO - PCA embedding stored in adata.obsm['Scanorama_PCA'] Running UMAP and PCA for Liger Concord - INFO - UMAP embedding stored in adata.obsm['Liger_UMAP'] Concord - INFO - PCA performed on source data with 29 components Concord - INFO - PCA embedding stored in adata.obsm['Liger_PCA'] Running UMAP and PCA for Harmony Concord - INFO - UMAP embedding stored in adata.obsm['Harmony_UMAP'] Concord - INFO - PCA performed on source data with 118 components Concord - INFO - PCA embedding stored in adata.obsm['Harmony_PCA'] Running UMAP and PCA for scVI Concord - INFO - UMAP embedding stored in adata.obsm['scVI_UMAP'] Concord - INFO - PCA performed on source data with 29 components Concord - INFO - PCA embedding stored in adata.obsm['scVI_PCA'] Running UMAP and PCA for Concord_nd Concord - INFO - UMAP embedding stored in adata.obsm['Concord_nd_UMAP'] Concord - INFO - PCA performed on source data with 31 components Concord - INFO - PCA embedding stored in adata.obsm['Concord_nd_PCA'] Running UMAP and PCA for Concord Concord - INFO - UMAP embedding stored in adata.obsm['Concord_UMAP'] Concord - INFO - PCA performed on source data with 31 components Concord - INFO - PCA embedding stored in adata.obsm['Concord_PCA'] Running UMAP and PCA for Concord-decoder Concord - INFO - UMAP embedding stored in adata.obsm['Concord-decoder_UMAP'] Concord - INFO - PCA performed on source data with 31 components Concord - INFO - PCA embedding stored in adata.obsm['Concord-decoder_PCA'] Running UMAP and PCA for Concord-class Concord - INFO - UMAP embedding stored in adata.obsm['Concord-class_UMAP'] Concord - INFO - PCA performed on source data with 31 components Concord - INFO - PCA embedding stored in adata.obsm['Concord-class_PCA'] Running UMAP and PCA for scANVI Concord - INFO - UMAP embedding stored in adata.obsm['scANVI_UMAP'] Concord - INFO - PCA performed on source data with 29 components Concord - INFO - PCA embedding stored in adata.obsm['scANVI_PCA']
In [12]:
Copied!
adata.write_h5ad(data_dir / f"adata_{file_suffix}.h5ad")
adata_state.write_h5ad(data_dir / f"adata_state_{file_suffix}.h5ad")
file_suffix
adata.write_h5ad(data_dir / f"adata_{file_suffix}.h5ad")
adata_state.write_h5ad(data_dir / f"adata_state_{file_suffix}.h5ad")
file_suffix
Out[12]:
'Feb07-1613'
In [15]:
Copied!
adata.obsm
adata.obsm
Out[15]:
AxisArrays with keys: Concord, Concord-class, Concord-class_PCA, Concord-class_UMAP, Concord-decoder, Concord-decoder_PCA, Concord-decoder_UMAP, Concord_PCA, Concord_UMAP, Concord_UMAP_2D, Harmony, Harmony_PCA, Harmony_UMAP, Liger, Liger_PCA, Liger_UMAP, PCA_no_noise, PCA_no_noise_UMAP, PCA_wt_noise, PCA_wt_noise_UMAP, Scanorama, Scanorama_PCA, Scanorama_UMAP, Unintegrated, Unintegrated_PCA, Unintegrated_UMAP, X_pca, X_umap, scANVI, scANVI_PCA, scANVI_UMAP, scVI, scVI_PCA, scVI_UMAP, Concord_nd, Concord_nd_UMAP, Concord_nd_PCA, X_draw_graph_kk
In [14]:
Copied!
# plot everything
import matplotlib.pyplot as plt
import pandas as pd
# from matplotlib import font_manager, rcParams
# # Add custom font path
# font_dirs = ['/wynton/home/gartner/zhuqin/.conda/envs/cellpath/fonts'] # Your custom fonts directory
# font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
# # Create FontProperties for each custom font and add to the font manager
# for font_path in font_files:
# font_manager.fontManager.addfont(font_path)
# Set Arial as the default font
custom_rc = {
'font.family': 'Arial', # Set the desired font for this plot
}
color_bys = ['time', 'batch']
basis_types = ['KNN', 'UMAP']
#basis_types = ['PCA']
font_size=8
point_size=2.5
alpha=0.8
figsize=(0.9*len(combined_keys),1)
ncols = len(combined_keys)
nrows = int(np.ceil(len(combined_keys) / ncols))
pal = {'time':'viridis', 'batch':'Set1'}
k=15
edges_color='grey'
edges_width=0
layout='kk'
threshold = 0.1
node_size_scale=0.1
edge_width_scale=0.1
rasterized = True
with plt.rc_context(rc=custom_rc):
ccd.pl.plot_all_embeddings(
adata,
combined_keys,
color_bys=color_bys,
basis_types=basis_types,
pal=pal,
k=k,
edges_color=edges_color,
edges_width=edges_width,
layout=layout,
threshold=threshold,
node_size_scale=node_size_scale,
edge_width_scale=edge_width_scale,
font_size=font_size,
point_size=point_size,
alpha=alpha,
rasterized=rasterized,
figsize=figsize,
ncols=ncols,
seed=seed,
leiden_key='leiden',
save_dir=save_dir,
file_suffix=file_suffix+f'rasterized_{rasterized}',
save_format='svg'
)
# plot everything
import matplotlib.pyplot as plt
import pandas as pd
# from matplotlib import font_manager, rcParams
# # Add custom font path
# font_dirs = ['/wynton/home/gartner/zhuqin/.conda/envs/cellpath/fonts'] # Your custom fonts directory
# font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
# # Create FontProperties for each custom font and add to the font manager
# for font_path in font_files:
# font_manager.fontManager.addfont(font_path)
# Set Arial as the default font
custom_rc = {
'font.family': 'Arial', # Set the desired font for this plot
}
color_bys = ['time', 'batch']
basis_types = ['KNN', 'UMAP']
#basis_types = ['PCA']
font_size=8
point_size=2.5
alpha=0.8
figsize=(0.9*len(combined_keys),1)
ncols = len(combined_keys)
nrows = int(np.ceil(len(combined_keys) / ncols))
pal = {'time':'viridis', 'batch':'Set1'}
k=15
edges_color='grey'
edges_width=0
layout='kk'
threshold = 0.1
node_size_scale=0.1
edge_width_scale=0.1
rasterized = True
with plt.rc_context(rc=custom_rc):
ccd.pl.plot_all_embeddings(
adata,
combined_keys,
color_bys=color_bys,
basis_types=basis_types,
pal=pal,
k=k,
edges_color=edges_color,
edges_width=edges_width,
layout=layout,
threshold=threshold,
node_size_scale=node_size_scale,
edge_width_scale=edge_width_scale,
font_size=font_size,
point_size=point_size,
alpha=alpha,
rasterized=rasterized,
figsize=figsize,
ncols=ncols,
seed=seed,
leiden_key='leiden',
save_dir=save_dir,
file_suffix=file_suffix+f'rasterized_{rasterized}',
save_format='svg'
)
Concord.plotting.pl_embedding - INFO - Plotting PCA_no_noise with time in KNN Concord.plotting.pl_embedding - INFO - Plotting PCA_wt_noise with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Unintegrated with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Scanorama with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Liger with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Harmony with time in KNN Concord.plotting.pl_embedding - INFO - Plotting scVI with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord_nd with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord-decoder with time in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord-class with time in KNN Concord.plotting.pl_embedding - INFO - Plotting scANVI with time in KNN
Concord.plotting.pl_embedding - INFO - Plotting PCA_no_noise with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting PCA_wt_noise with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Unintegrated with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Scanorama with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Liger with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Harmony with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting scVI with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord_nd with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord-decoder with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting Concord-class with batch in KNN Concord.plotting.pl_embedding - INFO - Plotting scANVI with batch in KNN
Concord.plotting.pl_embedding - INFO - Plotting PCA_no_noise with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting PCA_wt_noise with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Unintegrated with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Scanorama with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Liger with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Harmony with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting scVI with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord_nd with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord-decoder with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord-class with time in UMAP Concord.plotting.pl_embedding - INFO - Plotting scANVI with time in UMAP
Concord.plotting.pl_embedding - INFO - Plotting PCA_no_noise with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting PCA_wt_noise with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Unintegrated with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Scanorama with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Liger with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Harmony with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting scVI with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord_nd with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord-decoder with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting Concord-class with batch in UMAP Concord.plotting.pl_embedding - INFO - Plotting scANVI with batch in UMAP
In [200]:
Copied!
adata.layers['no_noise'] = np.zeros_like(adata.X)
adata.layers['wt_noise'] = np.zeros_like(adata.X)
# Find the indices of common genes between `adata` and `adata_state`
common_genes = adata.var_names.intersection(adata_state.var_names)
adata_indices = adata.var_names.get_indexer(common_genes)
adata_state_indices = adata_state.var_names.get_indexer(common_genes)
# Copy data from `adata_state` to `adata` for these common genes
adata.layers['no_noise'][:, adata_indices] = adata_state.layers['no_noise'][:, adata_state_indices].copy()
adata.layers['wt_noise'][:, adata_indices] = adata_state.layers['wt_noise'][:, adata_state_indices].copy()
# sort and smooth the signal along the path
batch_id=adata.obs['batch'].unique()[0]
batch_indices = np.where(adata.obs['batch'] == batch_id)[0]
_, _, _, feature_order = ccd.ul.sort_and_smooth_signal_along_path(adata, signal_key='Concord', path=batch_indices, sigma=2)
adata.obsm['Concord_sorted'] = adata.obsm['Concord'][:, feature_order]
_, _, _, feature_order = ccd.ul.sort_and_smooth_signal_along_path(adata, signal_key='Concord-decoder', path=batch_indices, sigma=2)
adata.obsm['Concord-decoder_sorted'] = adata.obsm['Concord-decoder'][:, feature_order]
adata.layers['no_noise'] = np.zeros_like(adata.X)
adata.layers['wt_noise'] = np.zeros_like(adata.X)
# Find the indices of common genes between `adata` and `adata_state`
common_genes = adata.var_names.intersection(adata_state.var_names)
adata_indices = adata.var_names.get_indexer(common_genes)
adata_state_indices = adata_state.var_names.get_indexer(common_genes)
# Copy data from `adata_state` to `adata` for these common genes
adata.layers['no_noise'][:, adata_indices] = adata_state.layers['no_noise'][:, adata_state_indices].copy()
adata.layers['wt_noise'][:, adata_indices] = adata_state.layers['wt_noise'][:, adata_state_indices].copy()
# sort and smooth the signal along the path
batch_id=adata.obs['batch'].unique()[0]
batch_indices = np.where(adata.obs['batch'] == batch_id)[0]
_, _, _, feature_order = ccd.ul.sort_and_smooth_signal_along_path(adata, signal_key='Concord', path=batch_indices, sigma=2)
adata.obsm['Concord_sorted'] = adata.obsm['Concord'][:, feature_order]
_, _, _, feature_order = ccd.ul.sort_and_smooth_signal_along_path(adata, signal_key='Concord-decoder', path=batch_indices, sigma=2)
adata.obsm['Concord-decoder_sorted'] = adata.obsm['Concord-decoder'][:, feature_order]
In [42]:
Copied!
# Plot heatmap of original data and Concord latent
import matplotlib.pyplot as plt
figsize = (2.3, 1.8)
ncols = 5
title_fontsize = 9
dpi = 600
fig, axes = plt.subplots(1, ncols, figsize=(figsize[0] * ncols, figsize[1]), dpi=dpi)
ccd.pl.heatmap_with_annotations(adata, val='no_noise', obs_keys=[state_key], ax = axes[0], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='State', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='wt_noise', obs_keys=[state_key], ax = axes[1], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='State+noise', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='X', obs_keys=[state_key, batch_key], ax = axes[2], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='State+noise+batch', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='Concord_sorted', obs_keys=[state_key, batch_key], ax = axes[3], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='Concord latent', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='Concord-decoder_sorted', obs_keys=[state_key, batch_key], ax = axes[4], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='Concord-decoder latent', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
plt.tight_layout(w_pad=0.0, h_pad=0.1)
plt.savefig(save_dir / f"all_heatmaps_{file_suffix}.svg", dpi=dpi, bbox_inches='tight')
# Plot heatmap of original data and Concord latent
import matplotlib.pyplot as plt
figsize = (2.3, 1.8)
ncols = 5
title_fontsize = 9
dpi = 600
fig, axes = plt.subplots(1, ncols, figsize=(figsize[0] * ncols, figsize[1]), dpi=dpi)
ccd.pl.heatmap_with_annotations(adata, val='no_noise', obs_keys=[state_key], ax = axes[0], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='State', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='wt_noise', obs_keys=[state_key], ax = axes[1], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='State+noise', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='X', obs_keys=[state_key, batch_key], ax = axes[2], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='State+noise+batch', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='Concord_sorted', obs_keys=[state_key, batch_key], ax = axes[3], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='Concord latent', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
ccd.pl.heatmap_with_annotations(adata, val='Concord-decoder_sorted', obs_keys=[state_key, batch_key], ax = axes[4], use_clustermap=False, yticklabels=False, cluster_cols=False, cluster_rows=False, value_annot=False, cmap='viridis', title='Concord-decoder latent', save_path=None, figsize=figsize, dpi=dpi, title_fontsize=title_fontsize)
plt.tight_layout(w_pad=0.0, h_pad=0.1)
plt.savefig(save_dir / f"all_heatmaps_{file_suffix}.svg", dpi=dpi, bbox_inches='tight')
Evaluation¶
In [5]:
Copied!
adata = sc.read(data_dir / f"adata_Nov24-2032.h5ad")
adata_state = sc.read(data_dir / f"adata_state_Nov24-2032.h5ad")
adata = sc.read(data_dir / f"adata_Nov24-2032.h5ad")
adata_state = sc.read(data_dir / f"adata_state_Nov24-2032.h5ad")
In [ ]:
Copied!
from scipy.sparse.csgraph import minimum_spanning_tree
knn_graph = adata_b1.obsp['connectivities']
# Compute the minimum spanning tree
mst_sparse = minimum_spanning_tree(knn_graph)
# Convert the MST to a sparse COO matrix (easier to work with)
mst_coo = mst_sparse.tocoo()
# Extract edges from the MST
mst_coo = mst_sparse.tocoo() # Convert MST to COO format for easier processing
# Get the coordinates of nodes (cells) in the embedding
embedding = adata_b1.obsm[show_basis] # Use UMAP embedding (or PCA, etc.)
# Extract the coordinates of the edges
edges_x = []
edges_y = []
for i, j in zip(mst_coo.row, mst_coo.col):
edges_x.append([embedding[i, 0], embedding[j, 0]]) # x-coordinates of the edge
edges_y.append([embedding[i, 1], embedding[j, 1]]) # y-coordinates of the edge
import matplotlib.pyplot as plt
# Scatter plot of the embedding
plt.figure(figsize=(10, 8))
plt.scatter(embedding[:, 0], embedding[:, 1], s=10, c=adata_b1.obs["time"], cmap="viridis")
# Plot the edges of the MST
for x, y in zip(edges_x, edges_y):
plt.plot(x, y, color="black", linewidth=0.1) # Draw edges
# Add labels and legend
plt.title("Minimum Spanning Tree on UMAP Embedding")
plt.xlabel("UMAP 1")
plt.ylabel("UMAP 2")
plt.colorbar(label="Pseudotime")
plt.legend()
plt.show()
from scipy.sparse.csgraph import dijkstra
root_node=0
# Compute shortest path lengths from the root node
distances, predecessors = dijkstra(mst_sparse, directed=False, return_predecessors=True, indices=root_node)
# Assign pseudotime directly to nodes
pseudotime = distances
# Project pseudotime onto all cells
cell_pseudotime = np.zeros(adata_b1.n_obs)
# Iterate over all cells
for i, cell in enumerate(adata_b1.obsm[show_basis]):
min_dist = float("inf")
assigned_time = 0
for start, end, weight in zip(mst_coo.row, mst_coo.col, mst_coo.data):
# Get coordinates of the two nodes defining the edge
coord_start, coord_end = adata_b1.obsm[show_basis][start], adata_b1.obsm[show_basis][end]
# Project cell onto the edge
edge_vector = coord_end - coord_start
cell_vector = cell - coord_start
proj_length = np.dot(cell_vector, edge_vector) / np.dot(edge_vector, edge_vector)
proj_length = np.clip(proj_length, 0, 1) # Clip to edge bounds
proj_point = coord_start + proj_length * edge_vector
dist = np.linalg.norm(cell - proj_point)
# Assign pseudotime based on projection
if dist < min_dist:
min_dist = dist
assigned_time = (1 - proj_length) * pseudotime[start] + proj_length * pseudotime[end]
cell_pseudotime[i] = assigned_time
# Normalize pseudotime to 0-1
cell_pseudotime = (cell_pseudotime - cell_pseudotime.min()) / (cell_pseudotime.max() - cell_pseudotime.min())
# Add to AnnData
adata_b1.obs["pseudotime_mst"] = cell_pseudotime
from scipy.sparse.csgraph import minimum_spanning_tree
knn_graph = adata_b1.obsp['connectivities']
# Compute the minimum spanning tree
mst_sparse = minimum_spanning_tree(knn_graph)
# Convert the MST to a sparse COO matrix (easier to work with)
mst_coo = mst_sparse.tocoo()
# Extract edges from the MST
mst_coo = mst_sparse.tocoo() # Convert MST to COO format for easier processing
# Get the coordinates of nodes (cells) in the embedding
embedding = adata_b1.obsm[show_basis] # Use UMAP embedding (or PCA, etc.)
# Extract the coordinates of the edges
edges_x = []
edges_y = []
for i, j in zip(mst_coo.row, mst_coo.col):
edges_x.append([embedding[i, 0], embedding[j, 0]]) # x-coordinates of the edge
edges_y.append([embedding[i, 1], embedding[j, 1]]) # y-coordinates of the edge
import matplotlib.pyplot as plt
# Scatter plot of the embedding
plt.figure(figsize=(10, 8))
plt.scatter(embedding[:, 0], embedding[:, 1], s=10, c=adata_b1.obs["time"], cmap="viridis")
# Plot the edges of the MST
for x, y in zip(edges_x, edges_y):
plt.plot(x, y, color="black", linewidth=0.1) # Draw edges
# Add labels and legend
plt.title("Minimum Spanning Tree on UMAP Embedding")
plt.xlabel("UMAP 1")
plt.ylabel("UMAP 2")
plt.colorbar(label="Pseudotime")
plt.legend()
plt.show()
from scipy.sparse.csgraph import dijkstra
root_node=0
# Compute shortest path lengths from the root node
distances, predecessors = dijkstra(mst_sparse, directed=False, return_predecessors=True, indices=root_node)
# Assign pseudotime directly to nodes
pseudotime = distances
# Project pseudotime onto all cells
cell_pseudotime = np.zeros(adata_b1.n_obs)
# Iterate over all cells
for i, cell in enumerate(adata_b1.obsm[show_basis]):
min_dist = float("inf")
assigned_time = 0
for start, end, weight in zip(mst_coo.row, mst_coo.col, mst_coo.data):
# Get coordinates of the two nodes defining the edge
coord_start, coord_end = adata_b1.obsm[show_basis][start], adata_b1.obsm[show_basis][end]
# Project cell onto the edge
edge_vector = coord_end - coord_start
cell_vector = cell - coord_start
proj_length = np.dot(cell_vector, edge_vector) / np.dot(edge_vector, edge_vector)
proj_length = np.clip(proj_length, 0, 1) # Clip to edge bounds
proj_point = coord_start + proj_length * edge_vector
dist = np.linalg.norm(cell - proj_point)
# Assign pseudotime based on projection
if dist < min_dist:
min_dist = dist
assigned_time = (1 - proj_length) * pseudotime[start] + proj_length * pseudotime[end]
cell_pseudotime[i] = assigned_time
# Normalize pseudotime to 0-1
cell_pseudotime = (cell_pseudotime - cell_pseudotime.min()) / (cell_pseudotime.max() - cell_pseudotime.min())
# Add to AnnData
adata_b1.obs["pseudotime_mst"] = cell_pseudotime
Scib¶
In [1]:
Copied!
from scib_metrics.benchmark import Benchmarker
bm = Benchmarker(
adata,
batch_key=batch_key,
label_key=leiden_key,
embedding_obsm_keys=latent_keys,
n_jobs=6,
)
bm.benchmark()
from scib_metrics.benchmark import Benchmarker
bm = Benchmarker(
adata,
batch_key=batch_key,
label_key=leiden_key,
embedding_obsm_keys=latent_keys,
n_jobs=6,
)
bm.benchmark()
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[1], line 1 ----> 1 from scib_metrics.benchmark import Benchmarker 2 bm = Benchmarker( 3 adata, 4 batch_key=batch_key, (...) 7 n_jobs=6, 8 ) 9 bm.benchmark() File ~/.conda/envs/cellpath/lib/python3.9/site-packages/scib_metrics/__init__.py:4 1 import logging 2 from importlib.metadata import version ----> 4 from . import nearest_neighbors, utils 5 from ._graph_connectivity import graph_connectivity 6 from ._isolated_labels import isolated_labels File ~/.conda/envs/cellpath/lib/python3.9/site-packages/scib_metrics/nearest_neighbors/__init__.py:1 ----> 1 from ._dataclass import NeighborsResults 2 from ._jax import jax_approx_min_k 3 from ._pynndescent import pynndescent File ~/.conda/envs/cellpath/lib/python3.9/site-packages/scib_metrics/nearest_neighbors/_dataclass.py:7 5 import numpy as np 6 from scipy.sparse import coo_matrix, csr_matrix ----> 7 from umap.umap_ import fuzzy_simplicial_set 10 @dataclass 11 class NeighborsResults: 12 """Nearest neighbors results data store. 13 14 Attributes (...) 21 the self edge. 22 """ File ~/.conda/envs/cellpath/lib/python3.9/site-packages/umap/__init__.py:2 1 from warnings import warn, catch_warnings, simplefilter ----> 2 from .umap_ import UMAP 4 try: 5 with catch_warnings(): File ~/.conda/envs/cellpath/lib/python3.9/site-packages/umap/umap_.py:29 27 from scipy.sparse import tril as sparse_tril, triu as sparse_triu 28 import scipy.sparse.csgraph ---> 29 import numba 31 import umap.distances as dist 33 import umap.sparse as sparse File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/__init__.py:77 74 from numba.core import types, errors 76 # Re-export typeof ---> 77 from numba.misc.special import ( 78 typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init, 79 literally, literal_unroll, 80 ) 82 # Re-export error classes 83 from numba.core.errors import * File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/misc/special.py:3 1 import numpy as np ----> 3 from numba.core.typing.typeof import typeof 4 from numba.core.typing.asnumbatype import as_numba_type 7 def pndindex(*args): File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/core/typing/__init__.py:1 ----> 1 from .context import BaseContext, Context 2 from .templates import (signature, make_concrete_template, Signature, 3 fold_arguments) File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/core/typing/context.py:14 12 from numba.core.typing import templates 13 from numba.core.utils import order_by_target_specificity ---> 14 from .typeof import typeof, Purpose 16 from numba.core import utils 19 class Rating(object): File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/core/typing/typeof.py:10 7 from numpy.random.bit_generator import BitGenerator 9 from numba.core import types, utils, errors ---> 10 from numba.np import numpy_support 13 # terminal color markup 14 _termcolor = errors.termcolor() File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/np/numpy_support.py:13 10 from numba.core.errors import TypingError 12 # re-export ---> 13 from numba.core.cgutils import is_nonelike # noqa: F401 16 numpy_version = tuple(map(int, np.__version__.split('.')[:2])) 19 FROM_DTYPE = { 20 np.dtype('bool'): types.boolean, 21 np.dtype('int8'): types.int8, (...) 37 np.dtype(object): types.pyobject, 38 } File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/core/cgutils.py:12 8 import functools 10 from llvmlite import ir ---> 12 from numba.core import utils, types, config, debuginfo 13 import numba.core.datamodel 16 bool_t = ir.IntType(1) File ~/.conda/envs/cellpath/lib/python3.9/site-packages/numba/core/debuginfo.py:12 10 from llvmlite import ir 11 from numba.core import cgutils, types ---> 12 from numba.core.datamodel.models import ComplexModel, UniTupleModel 13 from numba.core import config 16 @contextmanager 17 def suspend_emission(builder): File <frozen importlib._bootstrap>:1007, in _find_and_load(name, import_) File <frozen importlib._bootstrap>:986, in _find_and_load_unlocked(name, import_) File <frozen importlib._bootstrap>:680, in _load_unlocked(spec) File <frozen importlib._bootstrap_external>:846, in exec_module(self, module) File <frozen importlib._bootstrap_external>:941, in get_code(self, fullname) File <frozen importlib._bootstrap_external>:1040, in get_data(self, path) KeyboardInterrupt:
In [44]:
Copied!
import matplotlib.pyplot as plt
import os
bm.plot_results_table(min_max_scale=False, show=False)
fig = plt.gcf()
fig.set_size_inches(15, 6)
fig.savefig(os.path.join(save_dir, f'scibmetrics_results_{file_suffix}.pdf'), facecolor='white', dpi=600)
plt.show()
plt.close(fig)
import matplotlib.pyplot as plt
import os
bm.plot_results_table(min_max_scale=False, show=False)
fig = plt.gcf()
fig.set_size_inches(15, 6)
fig.savefig(os.path.join(save_dir, f'scibmetrics_results_{file_suffix}.pdf'), facecolor='white', dpi=600)
plt.show()
plt.close(fig)
In [45]:
Copied!
scib_scores = bm.get_results(min_max_scale=False)
# Convert row 'Metric Type' to multi-index column, first level is 'Metric Type', second level is existing column name
metric_type = scib_scores.loc['Metric Type']
scib_scores = scib_scores.drop('Metric Type') # Drop the last row now that it's stored in metric_type
scib_scores.columns = pd.MultiIndex.from_tuples([(metric_type[col], col) for col in scib_scores.columns])
scib_scores = ccd.ul.benchmark_stats_to_score(scib_scores, min_max_scale=False, one_minus=False, aggregate_score=False, rank=True, rank_col=('Aggregate score', 'Total'), name_exact=False)
ccd.pl.plot_benchmark_table(scib_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', save_path=save_dir / f"scib_results_{file_suffix}.pdf", figsize=(16, 6), dpi=300)
scib_scores = bm.get_results(min_max_scale=False)
# Convert row 'Metric Type' to multi-index column, first level is 'Metric Type', second level is existing column name
metric_type = scib_scores.loc['Metric Type']
scib_scores = scib_scores.drop('Metric Type') # Drop the last row now that it's stored in metric_type
scib_scores.columns = pd.MultiIndex.from_tuples([(metric_type[col], col) for col in scib_scores.columns])
scib_scores = ccd.ul.benchmark_stats_to_score(scib_scores, min_max_scale=False, one_minus=False, aggregate_score=False, rank=True, rank_col=('Aggregate score', 'Total'), name_exact=False)
ccd.pl.plot_benchmark_table(scib_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', save_path=save_dir / f"scib_results_{file_suffix}.pdf", figsize=(16, 6), dpi=300)
Topology¶
Run topological analysis pipeline:
In [47]:
Copied!
homology_dimensions = [0,1,2]
concord_keys = ['Concord', 'Concord-decoder', 'Concord-class']
#diagrams = {}
#for key in combined_keys:
for key in concord_keys:
print(f"Computing persistent homology for {key}")
diagrams[key] = ccd.ul.compute_persistent_homology(adata, key=key, homology_dimensions=homology_dimensions)
import pickle
with open(save_dir / f"topology_diagrams_{file_suffix}.pkl", 'wb') as f:
pickle.dump(diagrams, f)
homology_dimensions = [0,1,2]
concord_keys = ['Concord', 'Concord-decoder', 'Concord-class']
#diagrams = {}
#for key in combined_keys:
for key in concord_keys:
print(f"Computing persistent homology for {key}")
diagrams[key] = ccd.ul.compute_persistent_homology(adata, key=key, homology_dimensions=homology_dimensions)
import pickle
with open(save_dir / f"topology_diagrams_{file_suffix}.pkl", 'wb') as f:
pickle.dump(diagrams, f)
Computing persistent homology for Concord Computing persistent homology for Concord-decoder Computing persistent homology for Concord-class
In [48]:
Copied!
save_dir / f"topology_diagrams_{file_suffix}.pkl"
save_dir / f"topology_diagrams_{file_suffix}.pkl"
Out[48]:
PosixPath('../save/dev_simulation_trajectory-Nov24/topology_diagrams_Nov24-2032.pkl')
In [6]:
Copied!
import pickle
with open(Path('../save/dev_simulation_trajectory-Nov24') / f"topology_diagrams_Nov24-2032.pkl", 'rb') as f:
diagrams = pickle.load(f)
import pickle
with open(Path('../save/dev_simulation_trajectory-Nov24') / f"topology_diagrams_Nov24-2032.pkl", 'rb') as f:
diagrams = pickle.load(f)
In [7]:
Copied!
topology_results = ccd.ul.benchmark_topology(diagrams, expected_betti_numbers=[0,0,0], save_dir=save_dir, file_suffix=file_suffix)
max_betti = 5
topology_metrics = topology_results['combined_metrics'].drop(index=['PCA_no_noise', 'PCA_wt_noise'])
topology_metrics[('Betti number', 'L1 distance')] = topology_metrics[('Betti number', 'L1 distance')].clip(upper=5)
agg_name1 = 'Topology'
agg_name2 = 'Score'
topology_scores = ccd.ul.benchmark_stats_to_score(topology_metrics, min_max_scale=True, one_minus=True, aggregate_score=True, aggregate_score_name1=agg_name1, aggregate_score_name2=agg_name2, rank=True, rank_col=(agg_name1,agg_name2), name_exact=False)
ccd.pl.plot_benchmark_table(topology_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', agg_name = agg_name1, save_path=save_dir / f"topology_results_{file_suffix}.pdf", figsize=(6, 6), dpi=300)
topology_results = ccd.ul.benchmark_topology(diagrams, expected_betti_numbers=[0,0,0], save_dir=save_dir, file_suffix=file_suffix)
max_betti = 5
topology_metrics = topology_results['combined_metrics'].drop(index=['PCA_no_noise', 'PCA_wt_noise'])
topology_metrics[('Betti number', 'L1 distance')] = topology_metrics[('Betti number', 'L1 distance')].clip(upper=5)
agg_name1 = 'Topology'
agg_name2 = 'Score'
topology_scores = ccd.ul.benchmark_stats_to_score(topology_metrics, min_max_scale=True, one_minus=True, aggregate_score=True, aggregate_score_name1=agg_name1, aggregate_score_name2=agg_name2, rank=True, rank_col=(agg_name1,agg_name2), name_exact=False)
ccd.pl.plot_benchmark_table(topology_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', agg_name = agg_name1, save_path=save_dir / f"topology_results_{file_suffix}.pdf", figsize=(6, 6), dpi=300)
In [13]:
Copied!
# Reorder diagrams with the same order as the combined keys
diagrams_ordered = {key: diagrams[key] for key in combined_keys}
# Change the key names to remove 'PCA_'
diagrams_ordered = {key.replace('PCA_', ''): diagrams_ordered[key] for key in diagrams_ordered}
ccd.pl.plot_persistence_diagrams(diagrams_ordered, base_size=(1.3, 1.5), dpi=300, marker_size=4, n_cols=11, fontsize=10, save_path=save_dir / f"persistence_diagrams_{file_suffix}.pdf", legend=False, label_axes=False, axis_ticks=False)
# Reorder diagrams with the same order as the combined keys
diagrams_ordered = {key: diagrams[key] for key in combined_keys}
# Change the key names to remove 'PCA_'
diagrams_ordered = {key.replace('PCA_', ''): diagrams_ordered[key] for key in diagrams_ordered}
ccd.pl.plot_persistence_diagrams(diagrams_ordered, base_size=(1.3, 1.5), dpi=300, marker_size=4, n_cols=11, fontsize=10, save_path=save_dir / f"persistence_diagrams_{file_suffix}.pdf", legend=False, label_axes=False, axis_ticks=False)
In [14]:
Copied!
ccd.pl.plot_betti_curves(diagrams_ordered, nbins=100, base_size=(1.3, 1.5), n_cols=11, fontsize=10, save_path=save_dir / f"betti_curves_{file_suffix}.pdf", dpi=300, legend=False, label_axes=False, axis_ticks=False)
ccd.pl.plot_betti_curves(diagrams_ordered, nbins=100, base_size=(1.3, 1.5), n_cols=11, fontsize=10, save_path=save_dir / f"betti_curves_{file_suffix}.pdf", dpi=300, legend=False, label_axes=False, axis_ticks=False)
In [8]:
Copied!
# compare connectivity for latent vs ground truth, store the result in a pandas dataframe
groundtruth_keys = {'(nn)': 'PCA_no_noise','(wn)': 'PCA_wt_noise'}
connectivity_df = ccd.ul.benchmark_graph_connectivity(adata, emb_keys=combined_keys, groundtruth_keys=groundtruth_keys, k=30)
agg_name1 = 'Connectivity'
agg_name2 = 'Score'
connectivity_scores = ccd.ul.benchmark_stats_to_score(connectivity_df, min_max_scale=False, one_minus=False, aggregate_score=True, aggregate_score_name1=agg_name1, aggregate_score_name2=agg_name2, rank=True, rank_col=(agg_name1,agg_name2), name_exact=False)
ccd.pl.plot_benchmark_table(connectivity_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', agg_name = agg_name1, save_path=save_dir / f"connectivity_results_{file_suffix}.pdf", figsize=(8, 8), dpi=300)
# compare connectivity for latent vs ground truth, store the result in a pandas dataframe
groundtruth_keys = {'(nn)': 'PCA_no_noise','(wn)': 'PCA_wt_noise'}
connectivity_df = ccd.ul.benchmark_graph_connectivity(adata, emb_keys=combined_keys, groundtruth_keys=groundtruth_keys, k=30)
agg_name1 = 'Connectivity'
agg_name2 = 'Score'
connectivity_scores = ccd.ul.benchmark_stats_to_score(connectivity_df, min_max_scale=False, one_minus=False, aggregate_score=True, aggregate_score_name1=agg_name1, aggregate_score_name2=agg_name2, rank=True, rank_col=(agg_name1,agg_name2), name_exact=False)
ccd.pl.plot_benchmark_table(connectivity_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', agg_name = agg_name1, save_path=save_dir / f"connectivity_results_{file_suffix}.pdf", figsize=(8, 8), dpi=300)
Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index.
Geomtric Features¶
Integrated benchmark pipeline¶
In [9]:
Copied!
latent_keys = ["Unintegrated", "Scanorama", "Liger", "Harmony", "scVI", "scANVI", "Concord", 'Concord-decoder', "Concord-class"]
ground_keys = ['PCA_no_noise', 'PCA_wt_noise']
combined_keys = ground_keys + latent_keys
latent_keys = ["Unintegrated", "Scanorama", "Liger", "Harmony", "scVI", "scANVI", "Concord", 'Concord-decoder', "Concord-class"]
ground_keys = ['PCA_no_noise', 'PCA_wt_noise']
combined_keys = ground_keys + latent_keys
In [10]:
Copied!
geometry_metrics = ['pseudotime', 'cell_distance_corr', 'local_distal_corr', 'trustworthiness', 'state_distance_corr', 'state_dispersion_corr', 'state_batch_distance_ratio']
dist_metric = 'cosine'
corr_types = ['pearsonr', 'spearmanr', 'kendalltau']
#groundtruth_key = 'PCA_wt_noise'
groundtruth_key = 'PCA_no_noise'
# Convert state_dispersion to a dict of groundtruth dispersion
#groundtruth_dispersion = {'cluster_' + str(i): state_dispersion[i]**2 for i in range(5)} # convert to variance
geometry_df, geometry_full = ccd.ul.benchmark_geometry(adata, keys=combined_keys, eval_metrics=geometry_metrics,
dist_metric=dist_metric,
corr_types = corr_types,
groundtruth_key = groundtruth_key,
state_key = leiden_key,
batch_key = batch_key,
#groundtruth_dispersion = groundtruth_dispersion,
dispersion_metric='var',
return_type='full',
start_point=0,
end_point=adata.n_obs-1,
pseudotime_k = 30,
truetime_key = 'time',
save_dir=save_dir,
file_suffix=file_suffix)
geometry_metrics = ['pseudotime', 'cell_distance_corr', 'local_distal_corr', 'trustworthiness', 'state_distance_corr', 'state_dispersion_corr', 'state_batch_distance_ratio']
dist_metric = 'cosine'
corr_types = ['pearsonr', 'spearmanr', 'kendalltau']
#groundtruth_key = 'PCA_wt_noise'
groundtruth_key = 'PCA_no_noise'
# Convert state_dispersion to a dict of groundtruth dispersion
#groundtruth_dispersion = {'cluster_' + str(i): state_dispersion[i]**2 for i in range(5)} # convert to variance
geometry_df, geometry_full = ccd.ul.benchmark_geometry(adata, keys=combined_keys, eval_metrics=geometry_metrics,
dist_metric=dist_metric,
corr_types = corr_types,
groundtruth_key = groundtruth_key,
state_key = leiden_key,
batch_key = batch_key,
#groundtruth_dispersion = groundtruth_dispersion,
dispersion_metric='var',
return_type='full',
start_point=0,
end_point=adata.n_obs-1,
pseudotime_k = 30,
truetime_key = 'time',
save_dir=save_dir,
file_suffix=file_suffix)
Concord - INFO - Computing pseudotime correlation Concord - INFO - Computing pseudotime for PCA_no_noise Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for PCA_wt_noise Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for Unintegrated Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Failed to compute shortest path for Unintegrated Concord - INFO - Computing pseudotime for Scanorama Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for Liger Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for Harmony Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for scVI Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for scANVI Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for Concord Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for Concord-decoder Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing pseudotime for Concord-class Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord - INFO - Finding path between point_0 and point_999 Concord - INFO - Computing cell distance correlation Concord - INFO - Computing local vs distal correlation Concord - INFO - Computing trustworthiness Concord - INFO - Computing cluster centroid distances correlation Concord - INFO - Computing state dispersion correlation Concord - INFO - Computing state-batch distance ratio
In [11]:
Copied!
agg_name1 = 'Geometry'
agg_name2 = 'Score'
geometry_scores = ccd.ul.benchmark_stats_to_score(
geometry_df.drop(index=['PCA_no_noise', 'PCA_wt_noise']), fillna = 0,
min_max_scale=False, one_minus=False, aggregate_score=True, aggregate_score_name1=agg_name1, aggregate_score_name2=agg_name2, rank=True, rank_col=(agg_name1,agg_name2))
ccd.pl.plot_benchmark_table(geometry_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', agg_name = agg_name1, save_path=save_dir / f"geometry_results_noscale_{dist_metric}_{groundtruth_key}_{file_suffix}.pdf", figsize=(19, 7), dpi=300)
agg_name1 = 'Geometry'
agg_name2 = 'Score'
geometry_scores = ccd.ul.benchmark_stats_to_score(
geometry_df.drop(index=['PCA_no_noise', 'PCA_wt_noise']), fillna = 0,
min_max_scale=False, one_minus=False, aggregate_score=True, aggregate_score_name1=agg_name1, aggregate_score_name2=agg_name2, rank=True, rank_col=(agg_name1,agg_name2))
ccd.pl.plot_benchmark_table(geometry_scores, pal='PRGn', pal_agg='RdYlBu_r', cmap_method = 'minmax', agg_name = agg_name1, save_path=save_dir / f"geometry_results_noscale_{dist_metric}_{groundtruth_key}_{file_suffix}.pdf", figsize=(19, 7), dpi=300)
In [19]:
Copied!
pseudotime_result = geometry_full['Pseudotime']
pseudotime_result_no_noise = ccd.ul.compute_correlation(pseudotime_result['pseudotime'], corr_types=corr_types, groundtruth_key='PCA_no_noise')
pseudotime_result_wt_noise = ccd.ul.compute_correlation(pseudotime_result['pseudotime'], corr_types=corr_types, groundtruth_key='PCA_wt_noise')
pseudotime_result_no_noise
pseudotime_result = geometry_full['Pseudotime']
pseudotime_result_no_noise = ccd.ul.compute_correlation(pseudotime_result['pseudotime'], corr_types=corr_types, groundtruth_key='PCA_no_noise')
pseudotime_result_wt_noise = ccd.ul.compute_correlation(pseudotime_result['pseudotime'], corr_types=corr_types, groundtruth_key='PCA_wt_noise')
pseudotime_result_no_noise
Out[19]:
| pearsonr | spearmanr | kendalltau | |
|---|---|---|---|
| PCA_no_noise | 1.000000 | 1.000000 | 1.000000 |
| PCA_wt_noise | 0.998643 | 0.998806 | 0.970763 |
| Scanorama | 0.997319 | 0.998587 | 0.967444 |
| Liger | 0.995380 | 0.996154 | 0.948276 |
| Harmony | 0.997182 | 0.998728 | 0.968596 |
| scVI | 0.997339 | 0.998775 | 0.971026 |
| scANVI | 0.998256 | 0.999170 | 0.975595 |
| Concord | 0.997259 | 0.999379 | 0.978573 |
| Concord-decoder | 0.998703 | 0.999245 | 0.977143 |
| Concord-class | 0.998540 | 0.999311 | 0.977275 |
| time | 0.999970 | 1.000000 | 0.999990 |
In [29]:
Copied!
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['Pseudotime']['pseudotime'],
correlation= geometry_full['Pseudotime']['correlation'],
s=3,
ground_key = 'time', fontsize=9,
n_cols = 11, figsize=(1.7,2.1), dpi=300, save_path=save_dir / f"pseudotime_scatter_{groundtruth_key}_{file_suffix}.pdf")
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['Pseudotime']['pseudotime'],
correlation= geometry_full['Pseudotime']['correlation'],
s=3,
ground_key = 'time', fontsize=9,
n_cols = 11, figsize=(1.7,2.1), dpi=300, save_path=save_dir / f"pseudotime_scatter_{groundtruth_key}_{file_suffix}.pdf")
In [12]:
Copied!
import matplotlib.pyplot as plt
n_cols = 11
n_rows = int(np.ceil(len(combined_keys) / n_cols))
base_size = (1.5, 1.7)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols*base_size[0], n_rows*base_size[1]), dpi=300)
axes = np.atleast_2d(axes).flatten()
for basis in combined_keys:
show_basis = basis + '_UMAP'
if show_basis not in adata.obsm or basis not in geometry_full['Pseudotime']['pseudotime']:
show_indices = None
adata.obs['pseudotime_plot'] = np.nan
else:
show_indices = geometry_full['Pseudotime']['path'][basis]
adata.obs['pseudotime_plot'] = geometry_full['Pseudotime']['pseudotime'][basis]
show_cols = ['pseudotime_plot']
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=show_indices, highlight_size=5, draw_path=True, alpha=1.0,
font_size=12, point_size=10, path_width=1,
legend_loc='on data', title=basis, colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
ax=axes[combined_keys.index(basis)]
)
plt.tight_layout()
plt.savefig(save_dir / f"pseudotime_embedding_{file_suffix}.pdf")
import matplotlib.pyplot as plt
n_cols = 11
n_rows = int(np.ceil(len(combined_keys) / n_cols))
base_size = (1.5, 1.7)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols*base_size[0], n_rows*base_size[1]), dpi=300)
axes = np.atleast_2d(axes).flatten()
for basis in combined_keys:
show_basis = basis + '_UMAP'
if show_basis not in adata.obsm or basis not in geometry_full['Pseudotime']['pseudotime']:
show_indices = None
adata.obs['pseudotime_plot'] = np.nan
else:
show_indices = geometry_full['Pseudotime']['path'][basis]
adata.obs['pseudotime_plot'] = geometry_full['Pseudotime']['pseudotime'][basis]
show_cols = ['pseudotime_plot']
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=show_indices, highlight_size=5, draw_path=True, alpha=1.0,
font_size=12, point_size=10, path_width=1,
legend_loc='on data', title=basis, colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
ax=axes[combined_keys.index(basis)]
)
plt.tight_layout()
plt.savefig(save_dir / f"pseudotime_embedding_{file_suffix}.pdf")
In [26]:
Copied!
ccd.pl.plot_distance_heatmap(geometry_full['cell_distance_corr']['distance'], n_cols = 11, figsize=(1.1,1.3), cbar=False, dpi=300, save_path=save_dir / f"cell_distance_hmap_{file_suffix}.pdf")
ccd.pl.plot_distance_heatmap(geometry_full['cell_distance_corr']['distance'], n_cols = 11, figsize=(1.1,1.3), cbar=False, dpi=300, save_path=save_dir / f"cell_distance_hmap_{file_suffix}.pdf")
In [137]:
Copied!
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['cell_distance_corr']['distance'],
correlation= geometry_full['cell_distance_corr']['correlation'],
s=0.1, alpha = 0.2,
n_cols = 3, figsize=(2,2), dpi=300, save_path=save_dir / f"cell_distance_scatter_{file_suffix}.png")
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['cell_distance_corr']['distance'],
correlation= geometry_full['cell_distance_corr']['correlation'],
s=0.1, alpha = 0.2,
n_cols = 3, figsize=(2,2), dpi=300, save_path=save_dir / f"cell_distance_scatter_{file_suffix}.png")
In [30]:
Copied!
trustworthiness_scores = geometry_full['trustworthiness']['scores']
ccd.pl.plot_trustworthiness(trustworthiness_scores, text_shift=2, legend=True, save_path=save_dir / f"trustworthiness_{groundtruth_key}_{file_suffix}.pdf", figsize=(4,3))
trustworthiness_scores = geometry_full['trustworthiness']['scores']
ccd.pl.plot_trustworthiness(trustworthiness_scores, text_shift=2, legend=True, save_path=save_dir / f"trustworthiness_{groundtruth_key}_{file_suffix}.pdf", figsize=(4,3))
In [139]:
Copied!
ccd.pl.plot_distance_heatmap(geometry_full['state_distance_corr']['distance'],
n_cols = 3, annot_value=False,
figsize=(2,1.6), dpi=300, save_path=save_dir / f"cell_distance_hmap_{file_suffix}.png")
ccd.pl.plot_distance_heatmap(geometry_full['state_distance_corr']['distance'],
n_cols = 3, annot_value=False,
figsize=(2,1.6), dpi=300, save_path=save_dir / f"cell_distance_hmap_{file_suffix}.png")
In [140]:
Copied!
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['state_distance_corr']['distance'],
correlation= geometry_full['state_distance_corr']['correlation'],
n_cols = 3, figsize=(2,2), dpi=300, save_path=save_dir / f"state_distance_scatter_{file_suffix}.pdf")
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['state_distance_corr']['distance'],
correlation= geometry_full['state_distance_corr']['correlation'],
n_cols = 3, figsize=(2,2), dpi=300, save_path=save_dir / f"state_distance_scatter_{file_suffix}.pdf")
In [141]:
Copied!
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['state_dispersion_corr']['dispersion'],
correlation= geometry_full['state_dispersion_corr']['correlation'],
s=10,
ground_key = 'PCA_wt_noise',
n_cols = 3, figsize=(2,2), dpi=300, save_path=save_dir / f"state_distance_scatter_{file_suffix}.pdf")
ccd.pl.plot_geometry_scatter(
data_dict = geometry_full['state_dispersion_corr']['dispersion'],
correlation= geometry_full['state_dispersion_corr']['correlation'],
s=10,
ground_key = 'PCA_wt_noise',
n_cols = 3, figsize=(2,2), dpi=300, save_path=save_dir / f"state_distance_scatter_{file_suffix}.pdf")
In [142]:
Copied!
plot_df = geometry_full['state_batch_distance_ratio'].drop(index=['PCA_no_noise', 'PCA_wt_noise'])
ccd.pl.plot_bar(plot_df, 'State-Batch Distance Ratio (log10)', save_path=save_dir / f"state_batch_distance_ratio_{file_suffix}.pdf", figsize=(3,2), dpi=300)
plot_df = geometry_full['state_batch_distance_ratio'].drop(index=['PCA_no_noise', 'PCA_wt_noise'])
ccd.pl.plot_bar(plot_df, 'State-Batch Distance Ratio (log10)', save_path=save_dir / f"state_batch_distance_ratio_{file_suffix}.pdf", figsize=(3,2), dpi=300)
2.0 Decoder reconstruction¶
In [143]:
Copied!
adata.layers
adata.layers
Out[143]:
Layers with keys: wt_noise, counts, scVI_corrected_batch_1, Scanorama_corrected, Liger_corrected, scANVI_corrected_batch_1, Concord-decoder_decoded_batch_1, Concord-decoder_decoded_batch_2
In [144]:
Copied!
# Align and copy the layer data based on observation names
adata.layers['no_noise'] = np.zeros_like(adata.X)
# Find the indices of common genes between `adata` and `adata_state`
common_genes = adata.var_names.intersection(adata_state.var_names)
adata_indices = adata.var_names.get_indexer(common_genes)
adata_state_indices = adata_state.var_names.get_indexer(common_genes)
# Copy data from `adata_state` to `adata` for these common genes
adata.layers['no_noise'][:, adata_indices] = adata_state.layers['no_noise'][:, adata_state_indices].copy()
decoded_layers = ['Concord-decoder_decoded_batch_1', 'Concord-decoder_decoded_batch_2']
show_layers = ['no_noise', 'wt_noise'] + decoded_layers
ccd.pl.plot_adata_layer_heatmaps(adata, ncells=None, ngenes=None, layers=show_layers, cmap='viridis', vmin=0, vmax=7,
obs_keys=[state_key, batch_key], transpose=False, figsize=(6,6), dpi=300, save_path=save_dir/f'decoded_heatmap_{file_suffix}.png')
# Align and copy the layer data based on observation names
adata.layers['no_noise'] = np.zeros_like(adata.X)
# Find the indices of common genes between `adata` and `adata_state`
common_genes = adata.var_names.intersection(adata_state.var_names)
adata_indices = adata.var_names.get_indexer(common_genes)
adata_state_indices = adata_state.var_names.get_indexer(common_genes)
# Copy data from `adata_state` to `adata` for these common genes
adata.layers['no_noise'][:, adata_indices] = adata_state.layers['no_noise'][:, adata_state_indices].copy()
decoded_layers = ['Concord-decoder_decoded_batch_1', 'Concord-decoder_decoded_batch_2']
show_layers = ['no_noise', 'wt_noise'] + decoded_layers
ccd.pl.plot_adata_layer_heatmaps(adata, ncells=None, ngenes=None, layers=show_layers, cmap='viridis', vmin=0, vmax=7,
obs_keys=[state_key, batch_key], transpose=False, figsize=(6,6), dpi=300, save_path=save_dir/f'decoded_heatmap_{file_suffix}.png')
In [145]:
Copied!
# Compute the reconstruction error between the original and reconstructed data
mse_no_noise = np.zeros(len(decoded_layers))
mse_wt_noise = np.zeros(len(decoded_layers))
state_genes = adata.var_names[adata.var_names.isin(adata_state.var_names)]
for layer in decoded_layers:
mse_no_noise[decoded_layers.index(layer)] = ccd.ul.compute_reconstruction_error(adata[:,state_genes], 'no_noise', layer, metric='mse')
mse_wt_noise[decoded_layers.index(layer)] = ccd.ul.compute_reconstruction_error(adata[:,state_genes], 'wt_noise', layer, metric='mse')
# Report value, mean
print(f"MSE between no_noise and decoded layers: {mse_no_noise}")
print(f"MSE between wt_noise and decoded layers: {mse_wt_noise}")
print(f"Mean MSE between no_noise and decoded layers: {np.mean(mse_no_noise):.4f}")
print(f"Mean MSE between wt_noise and decoded layers: {np.mean(mse_wt_noise):.4f}")
# Compute the reconstruction error between the original and reconstructed data
mse_no_noise = np.zeros(len(decoded_layers))
mse_wt_noise = np.zeros(len(decoded_layers))
state_genes = adata.var_names[adata.var_names.isin(adata_state.var_names)]
for layer in decoded_layers:
mse_no_noise[decoded_layers.index(layer)] = ccd.ul.compute_reconstruction_error(adata[:,state_genes], 'no_noise', layer, metric='mse')
mse_wt_noise[decoded_layers.index(layer)] = ccd.ul.compute_reconstruction_error(adata[:,state_genes], 'wt_noise', layer, metric='mse')
# Report value, mean
print(f"MSE between no_noise and decoded layers: {mse_no_noise}")
print(f"MSE between wt_noise and decoded layers: {mse_wt_noise}")
print(f"Mean MSE between no_noise and decoded layers: {np.mean(mse_no_noise):.4f}")
print(f"Mean MSE between wt_noise and decoded layers: {np.mean(mse_wt_noise):.4f}")
MSE between no_noise and decoded layers: [0.35097491 0.32146425] MSE between wt_noise and decoded layers: [2.47886576 2.47109245] Mean MSE between no_noise and decoded layers: 0.3362 Mean MSE between wt_noise and decoded layers: 2.4750
Curvature analysis¶
In [146]:
Copied!
basis = 'Concord'
pseudotime_k = 30
start_point = 0
end_point = 100
neighborhood = ccd.ml.Neighborhood(adata.obsm[basis], k=pseudotime_k, use_faiss=True)
path, _ = ccd.ul.shortest_path_on_knn_graph(neighborhood, k=pseudotime_k, point_a=start_point, point_b=end_point, use_faiss=True)
basis = 'Concord'
pseudotime_k = 30
start_point = 0
end_point = 100
neighborhood = ccd.ml.Neighborhood(adata.obsm[basis], k=pseudotime_k, use_faiss=True)
path, _ = ccd.ul.shortest_path_on_knn_graph(neighborhood, k=pseudotime_k, point_a=start_point, point_b=end_point, use_faiss=True)
Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_100
In [147]:
Copied!
show_cols = ['time']
show_indices = path
show_basis = basis + '_PCA'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=show_indices, highlight_size=5, draw_path=True, alpha=1.0,
font_size=12, point_size=10, path_width=1,
legend_loc='on data', title=basis, colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
figsize=(6, 6), dpi=300, save_path=save_dir / f"pseudotime_embedding_{file_suffix}.pdf"
)
show_cols = ['time']
show_indices = path
show_basis = basis + '_PCA'
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=show_indices, highlight_size=5, draw_path=True, alpha=1.0,
font_size=12, point_size=10, path_width=1,
legend_loc='on data', title=basis, colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
figsize=(6, 6), dpi=300, save_path=save_dir / f"pseudotime_embedding_{file_suffix}.pdf"
)
In [110]:
Copied!
adata.obsm['PCA_no_noise_PCA'] = adata_state.obsm['PCA_no_noise']
adata.obsm['PCA_wt_noise_PCA'] = adata_state.obsm['PCA_wt_noise']
adata.obsm['PCA_no_noise_PCA'] = adata_state.obsm['PCA_no_noise']
adata.obsm['PCA_wt_noise_PCA'] = adata_state.obsm['PCA_wt_noise']
In [138]:
Copied!
curvature_res = {}
for basis in combined_keys:
print(f"Computing curvature for {basis}")
try:
curvature_res[basis]=ccd.ul.curvatures_across_time(adata, basis=basis, k=10, time_key='time', time_interval_frac=0.05)
except:
print(f"Error computing curvature for {basis}")
continue
curvature_res = {}
for basis in combined_keys:
print(f"Computing curvature for {basis}")
try:
curvature_res[basis]=ccd.ul.curvatures_across_time(adata, basis=basis, k=10, time_key='time', time_interval_frac=0.05)
except:
print(f"Error computing curvature for {basis}")
continue
Computing curvature for PCA_no_noise Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for PCA_wt_noise Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for Unintegrated Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Error computing curvature for Unintegrated Computing curvature for Scanorama Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for Liger Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for Harmony Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for scVI Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for scANVI Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for Concord Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for Concord-decoder Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499 Computing curvature for Concord-class Concord - INFO - Computing curvatures at time points: [ 0. 49.9 99.8 149.7 199.6 249.5 299.4 349.3 399.2 449.1 499. 548.9 598.8 648.7 698.6 748.5 798.4 848.3 898.2 948.1] Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_24 Concord - INFO - Finding path between point_24 and point_57 Concord - INFO - Finding path between point_57 and point_570 Concord - INFO - Finding path between point_570 and point_102 Concord - INFO - Finding path between point_102 and point_127 Concord - INFO - Finding path between point_127 and point_152 Concord - INFO - Finding path between point_152 and point_667 Concord - INFO - Finding path between point_667 and point_696 Concord - INFO - Finding path between point_696 and point_235 Concord - INFO - Finding path between point_235 and point_259 Concord - INFO - Finding path between point_259 and point_292 Concord - INFO - Finding path between point_292 and point_320 Concord - INFO - Finding path between point_320 and point_342 Concord - INFO - Finding path between point_342 and point_855 Concord - INFO - Finding path between point_855 and point_388 Concord - INFO - Finding path between point_388 and point_406 Concord - INFO - Finding path between point_406 and point_926 Concord - INFO - Finding path between point_926 and point_950 Concord - INFO - Finding path between point_950 and point_478 Concord - INFO - Finding path between point_478 and point_499
In [142]:
Copied!
# Plot curvature as a function of time for each latent embedding in curvature_res, color by basis
import matplotlib.pyplot as plt
for basis in curvature_res:
vals = curvature_res[basis]['curvature']
curvature_res[basis]['curvature_minmax'] = (vals - vals.min()) / (vals.max() - vals.min())
# Plot curvature as a function of time for each latent embedding in curvature_res, color by basis
nrows = len(curvature_res)
ncols = 1
base_size=(6, 3)
ground_basis = 'PCA_no_noise'
fig, ax = plt.subplots(nrows, ncols, figsize=(ncols*base_size[0], nrows*base_size[1]), dpi=300)
for i, basis in enumerate(curvature_res):
vals = curvature_res[basis]['curvature_minmax']
vals_ground = curvature_res[ground_basis]['curvature_minmax']
ax[i].plot(curvature_res[basis]['mid_time'], vals, label=basis)
ax[i].plot(curvature_res[ground_basis]['mid_time'], vals_ground, label=ground_basis)
ax[i].set_xlabel('Time')
ax[i].set_ylabel('Curvature')
ax[i].legend()
plt.savefig(save_dir / f"curvature_time_{file_suffix}.pdf")
# Plot curvature as a function of time for each latent embedding in curvature_res, color by basis
import matplotlib.pyplot as plt
for basis in curvature_res:
vals = curvature_res[basis]['curvature']
curvature_res[basis]['curvature_minmax'] = (vals - vals.min()) / (vals.max() - vals.min())
# Plot curvature as a function of time for each latent embedding in curvature_res, color by basis
nrows = len(curvature_res)
ncols = 1
base_size=(6, 3)
ground_basis = 'PCA_no_noise'
fig, ax = plt.subplots(nrows, ncols, figsize=(ncols*base_size[0], nrows*base_size[1]), dpi=300)
for i, basis in enumerate(curvature_res):
vals = curvature_res[basis]['curvature_minmax']
vals_ground = curvature_res[ground_basis]['curvature_minmax']
ax[i].plot(curvature_res[basis]['mid_time'], vals, label=basis)
ax[i].plot(curvature_res[ground_basis]['mid_time'], vals_ground, label=ground_basis)
ax[i].set_xlabel('Time')
ax[i].set_ylabel('Curvature')
ax[i].legend()
plt.savefig(save_dir / f"curvature_time_{file_suffix}.pdf")
In [143]:
Copied!
# Compute correlation between curvature and groundtruth curvature
curvature_list = {
basis: curvature_res[basis]['curvature'] for basis in curvature_res
}
corr_curvature = ccd.ul.compute_correlation(curvature_list, corr_types=corr_types, groundtruth_key=ground_basis)
corr_curvature
# Compute correlation between curvature and groundtruth curvature
curvature_list = {
basis: curvature_res[basis]['curvature'] for basis in curvature_res
}
corr_curvature = ccd.ul.compute_correlation(curvature_list, corr_types=corr_types, groundtruth_key=ground_basis)
corr_curvature
Out[143]:
| pearsonr | spearmanr | kendalltau | |
|---|---|---|---|
| PCA_no_noise | 1.000000 | 1.000000 | 1.000000 |
| PCA_wt_noise | -0.529972 | -0.477651 | -0.337136 |
| Scanorama | -0.661655 | -0.623979 | -0.478515 |
| Liger | -0.162326 | -0.097193 | -0.093179 |
| Harmony | -0.485048 | -0.527691 | -0.380637 |
| scVI | -0.179533 | -0.045491 | -0.054377 |
| scANVI | -0.184196 | -0.385154 | -0.250133 |
| Concord | -0.039616 | 0.031843 | 0.043501 |
| Concord-decoder | -0.202897 | -0.113726 | -0.065252 |
| Concord-class | -0.087593 | 0.025020 | 0.043501 |
In [126]:
Copied!
len(curvature_res[basis])
len(curvature_res[basis])
Out[126]:
10
In [144]:
Copied!
for basis in curvature_res:
# Initialize the column with NaN
adata.obs[f'curvature_{basis}'] = np.NaN
# Assign curvature values to cells between start and end points
for i in range(len(curvature_res[basis])):
# Extract scalar positional indices
start_point = int(curvature_res[basis]['start_point'].iloc[i])
end_point = int(curvature_res[basis]['end_point'].iloc[i])
# Assign curvature values using `.iloc` for position-based slicing
adata.obs.iloc[start_point:end_point + 1, adata.obs.columns.get_loc(f'curvature_{basis}')] = curvature_res[basis]['curvature'].iloc[i]
for basis in curvature_res:
# Initialize the column with NaN
adata.obs[f'curvature_{basis}'] = np.NaN
# Assign curvature values to cells between start and end points
for i in range(len(curvature_res[basis])):
# Extract scalar positional indices
start_point = int(curvature_res[basis]['start_point'].iloc[i])
end_point = int(curvature_res[basis]['end_point'].iloc[i])
# Assign curvature values using `.iloc` for position-based slicing
adata.obs.iloc[start_point:end_point + 1, adata.obs.columns.get_loc(f'curvature_{basis}')] = curvature_res[basis]['curvature'].iloc[i]
In [145]:
Copied!
# Plot the curvature values on the UMAP embedding
basis = 'PCA_no_noise'
show_basis = f'{basis}_PCA'
show_cols = [f'curvature_{basis}']
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=None, highlight_size=5, draw_path=False, alpha=1.0,
font_size=12, point_size=10, path_width=1,
legend_loc='on data', title='Curvature', colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
figsize=(6, 6), dpi=300, save_path=save_dir / f"curvature_embedding_{file_suffix}.pdf"
)
# Plot the curvature values on the UMAP embedding
basis = 'PCA_no_noise'
show_basis = f'{basis}_PCA'
show_cols = [f'curvature_{basis}']
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=None, highlight_size=5, draw_path=False, alpha=1.0,
font_size=12, point_size=10, path_width=1,
legend_loc='on data', title='Curvature', colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
figsize=(6, 6), dpi=300, save_path=save_dir / f"curvature_embedding_{file_suffix}.pdf"
)
In [153]:
Copied!
adata.obs[f'curvature_{basis}']
adata.obs[f'curvature_{basis}']
Out[153]:
batch_1_Cell_1 1.432309
batch_1_Cell_3 1.432309
batch_1_Cell_4 1.432309
batch_1_Cell_6 1.432309
batch_1_Cell_7 1.432309
...
batch_2_Cell_990 NaN
batch_2_Cell_991 NaN
batch_2_Cell_992 NaN
batch_2_Cell_996 NaN
batch_2_Cell_997 NaN
Name: curvature_Concord, Length: 1000, dtype: float64
In [178]:
Copied!
adata.obs[f'curvature_{basis}'].max()
adata.obs[f'curvature_{basis}'].max()
Out[178]:
8.11891861960433
In [196]:
Copied!
#basis = 'PCA_no_noise'
basis = 'Concord'
k=15
neighborhood = ccd.ul.Neighborhood(adata.obsm[basis], k=k)
show_cols = ['time', f'curvature_{basis}']
adata.obs[f'curvature_{basis}'] = np.nan
#adata.obs[f'curvature_{basis}'] = np.linspace(0, 2, adata.n_obs)
show_basis = basis + '_PCA'
interval_frac = 0.06
interval = int(interval_frac * adata.n_obs)
step_frac = 0.02
step = int(step_frac * adata.n_obs)
time_vec = adata.obs['time']
time_points = np.arange(time_vec.min(), time_vec.max(), step)
for t in time_points:
# Extract scalar positional indices
start_time = i * step
end_time = start_time + interval
start_point = np.argmin(np.abs(time_vec - t))
end_point = np.argmin(np.abs(time_vec - (t + interval)))
# Assign curvature values using `.iloc` for position-based slicing
path, _ = ccd.ul.shortest_path_on_knn_graph(neighborhood, point_a=start_point, point_b=end_point)
curvature = ccd.ul.curvature_along_path(adata, basis=basis, path=path)
curvature_new = pd.Series(curvature, index=path)
curvature_old = pd.Series(adata.obs.iloc[path, adata.obs.columns.get_loc(f'curvature_{basis}')], index=path)
#print("curvature_new", curvature_new)
#print("curvature_old", curvature_old)
curvature_df = pd.concat([curvature_old, curvature_new], axis=1)
# print("curvature_df", curvature_df)
# print("curvature_df.mean(axis=1)", curvature_df.mean(axis=1))
adata.obs.iloc[path, adata.obs.columns.get_loc(f'curvature_{basis}')] = curvature_df.mean(axis=1)
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=None, highlight_size=30, draw_path=False, alpha=1.0, ncols=2,
font_size=12, point_size=5, path_width=0.2,
legend_loc='on data', title=basis, colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
figsize=(6, 3), dpi=300, save_path=None
)
#basis = 'PCA_no_noise'
basis = 'Concord'
k=15
neighborhood = ccd.ul.Neighborhood(adata.obsm[basis], k=k)
show_cols = ['time', f'curvature_{basis}']
adata.obs[f'curvature_{basis}'] = np.nan
#adata.obs[f'curvature_{basis}'] = np.linspace(0, 2, adata.n_obs)
show_basis = basis + '_PCA'
interval_frac = 0.06
interval = int(interval_frac * adata.n_obs)
step_frac = 0.02
step = int(step_frac * adata.n_obs)
time_vec = adata.obs['time']
time_points = np.arange(time_vec.min(), time_vec.max(), step)
for t in time_points:
# Extract scalar positional indices
start_time = i * step
end_time = start_time + interval
start_point = np.argmin(np.abs(time_vec - t))
end_point = np.argmin(np.abs(time_vec - (t + interval)))
# Assign curvature values using `.iloc` for position-based slicing
path, _ = ccd.ul.shortest_path_on_knn_graph(neighborhood, point_a=start_point, point_b=end_point)
curvature = ccd.ul.curvature_along_path(adata, basis=basis, path=path)
curvature_new = pd.Series(curvature, index=path)
curvature_old = pd.Series(adata.obs.iloc[path, adata.obs.columns.get_loc(f'curvature_{basis}')], index=path)
#print("curvature_new", curvature_new)
#print("curvature_old", curvature_old)
curvature_df = pd.concat([curvature_old, curvature_new], axis=1)
# print("curvature_df", curvature_df)
# print("curvature_df.mean(axis=1)", curvature_df.mean(axis=1))
adata.obs.iloc[path, adata.obs.columns.get_loc(f'curvature_{basis}')] = curvature_df.mean(axis=1)
ccd.pl.plot_embedding(
adata, show_basis, show_cols, highlight_indices=None, highlight_size=30, draw_path=False, alpha=1.0, ncols=2,
font_size=12, point_size=5, path_width=0.2,
legend_loc='on data', title=basis, colorbar_loc=None, rasterized=True, xlabel=None, ylabel=None,
figsize=(6, 3), dpi=300, save_path=None
)
Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - INFO - Building Faiss FlatL2 index. Concord.model.knn - INFO - Using FAISS CPU index. Concord.model.knn - WARNING - K-NN graph is not computed. Computing now. Concord - INFO - Finding path between point_0 and point_30 Concord - INFO - Finding path between point_509 and point_44 Concord - INFO - Finding path between point_19 and point_57 Concord - INFO - Finding path between point_30 and point_67 Concord - INFO - Finding path between point_44 and point_76 Concord - INFO - Finding path between point_57 and point_572 Concord - INFO - Finding path between point_67 and point_580 Concord - INFO - Finding path between point_76 and point_102 Concord - INFO - Finding path between point_572 and point_112 Concord - INFO - Finding path between point_580 and point_122 Concord - INFO - Finding path between point_102 and point_131 Concord - INFO - Finding path between point_112 and point_141 Concord - INFO - Finding path between point_122 and point_153 Concord - INFO - Finding path between point_131 and point_165 Concord - INFO - Finding path between point_141 and point_177 Concord - INFO - Finding path between point_153 and point_188 Concord - INFO - Finding path between point_165 and point_199 Concord - INFO - Finding path between point_177 and point_697 Concord - INFO - Finding path between point_188 and point_216 Concord - INFO - Finding path between point_199 and point_229 Concord - INFO - Finding path between point_697 and point_728 Concord - INFO - Finding path between point_216 and point_739 Concord - INFO - Finding path between point_229 and point_260 Concord - INFO - Finding path between point_728 and point_762 Concord - INFO - Finding path between point_739 and point_284 Concord - INFO - Finding path between point_260 and point_787
Concord - INFO - Finding path between point_762 and point_304 Concord - INFO - Finding path between point_284 and point_320 Concord - INFO - Finding path between point_787 and point_818 Concord - INFO - Finding path between point_304 and point_827 Concord - INFO - Finding path between point_320 and point_348 Concord - INFO - Finding path between point_818 and point_358 Concord - INFO - Finding path between point_827 and point_855 Concord - INFO - Finding path between point_348 and point_867 Concord - INFO - Finding path between point_358 and point_385 Concord - INFO - Finding path between point_855 and point_888 Concord - INFO - Finding path between point_867 and point_399 Concord - INFO - Finding path between point_385 and point_408 Concord - INFO - Finding path between point_888 and point_415 Concord - INFO - Finding path between point_399 and point_921 Concord - INFO - Finding path between point_408 and point_932 Concord - INFO - Finding path between point_415 and point_437 Concord - INFO - Finding path between point_921 and point_449 Concord - INFO - Finding path between point_932 and point_460 Concord - INFO - Finding path between point_437 and point_973 Concord - INFO - Finding path between point_449 and point_481 Concord - INFO - Finding path between point_460 and point_989 Concord - INFO - Finding path between point_973 and point_499 Concord - INFO - Finding path between point_481 and point_499 Concord - INFO - Finding path between point_989 and point_499
In [201]:
Copied!
ccd.pl.heatmap_with_annotations(adata, val='X', obs_keys=[state_key, batch_key, f'curvature_PCA_no_noise', f'curvature_{basis}'],
cluster_cols=False, cluster_rows=False, cmap='viridis')
ccd.pl.heatmap_with_annotations(adata, val=basis+'_sorted', obs_keys=[state_key, batch_key, f'curvature_PCA_no_noise', f'curvature_{basis}'],
cluster_cols=False, cluster_rows=False, cmap='viridis')
ccd.pl.heatmap_with_annotations(adata, val='X', obs_keys=[state_key, batch_key, f'curvature_PCA_no_noise', f'curvature_{basis}'],
cluster_cols=False, cluster_rows=False, cmap='viridis')
ccd.pl.heatmap_with_annotations(adata, val=basis+'_sorted', obs_keys=[state_key, batch_key, f'curvature_PCA_no_noise', f'curvature_{basis}'],
cluster_cols=False, cluster_rows=False, cmap='viridis')
Out[201]:
<seaborn.matrix.ClusterGrid at 0x7f848c41a190>
In [ ]:
Copied!
In [ ]:
Copied!